您好,登錄后才能下訂單哦!
前些天,在Q群里面看到有人請教這樣一個問題:在SQL Server中如何用SQL實現以下計算
由圖得知,該問題是如何計算Jaccard系數。Jaccard系數,又稱為Jaccard相似系數(Jaccard similarity coefficient)用于比較有限樣本集之間的相似性與差異性。Jaccard系數值越大,樣本相似度越高。
SQL Server通過intersect實現交集,union實現并集,如下:
intersect交集
計算交集代碼段如下:(1 intersect 0 = null,1 intersect 1 = 1,0 intersect 0 = 0)
union并集
計算并集代碼段如下:(1 union 0 = 1 0,1 union 1 = 1,0 union 0 = 0)
原理簡介:
通過sys.columns表獲取表的字段名與字段ID,循環字段名,并根據字段名去取到表中對應的值:如字段名為A,那么取ID為1的值:select A from test where id=1,取ID為2的值:select A from test where id=2,
然后將2個值進行交集與并集,字段循環結束后,得到@str_intersect,@str_union,計算比值:len(@str_intersect)*1.0/len(@str_union)
最終結果如下:
全部代碼如下:(建表,見注釋部分)
--create table Test(id int,A INT,B INT,C INT,D INT,E INT,F INT) --INSERT INTO TEST SELECT 1,1,1,1,0,0,0 --INSERT INTO TEST SELECT 2,0,1,0,1,0,1 --INSERT INTO TEST SELECT 3,1,1,1,1,0,1 --INSERT INTO TEST SELECT 4,1,1,1,0,1,0 --INSERT INTO TEST SELECT 5,0,1,0,1,1,1 --INSERT INTO TEST SELECT 6,0,0,1,0,1,1 --drop table Test_result --create table Test_result(id int,_1_ numeric(10,4),_2_ numeric(10,4),_3_ numeric(10,4),_4_ numeric(10,4),_5_ numeric(10,4),_6_ numeric(10,4)) --insert into Test_result select 1,null,null,null,null,null,null --insert into Test_result select 2,null,null,null,null,null,null --insert into Test_result select 3,null,null,null,null,null,null --insert into Test_result select 4,null,null,null,null,null,null --insert into Test_result select 5,null,null,null,null,null,null --insert into Test_result select 6,null,null,null,null,null,null SELECT name,column_id into #test FROM SYS.COLUMNS WHERE object_id =object_id('dbo.test') and column_id>1 declare @id_1 int=0,@id_2 int=0,@str_union varchar(max),@a_union int,@sql_union varchar(max),@str_intersect varchar(max),@a_intersect int,@sql_intersect varchar(max) declare @name varchar(20),@column_id int create table #a_union(num int) create table #a_intersect(num int) declare @min_id int=0,@max_id int=0,@global_min_id int=0,@global_max_id int=0 select @min_id=min(id),@max_id=max(id),@global_min_id=min(id),@global_max_id=max(id) from Test while(@min_id<=@max_id) begin select @id_1=@min_id,@id_2=@global_min_id while @id_2<=@global_max_id begin select @str_union='',@str_intersect='' while(select count(1) from #test)>0 begin select top 1 @name=name,@column_id=column_id from #test order by column_id select @sql_union='select CASE '+ 'WHEN (select count(1) from ( SELECT '+@name+' FROM TEST WHERE ID='+convert(varchar,@id_1)+' union SELECT '+@name+' FROM TEST WHERE ID='+convert(varchar,@id_2)+') as a)>1 THEN 1' +' WHEN ISNULL((SELECT '+@name+' FROM TEST WHERE ID='+convert(varchar,@id_1)+' UNION SELECT ' +@name+' FROM TEST WHERE ID='+convert(varchar,@id_2)+'),0)=0 THEN 0 ELSE 1 END' insert into #a_union exec(@sql_union) SELECT @a_union=num from #a_union delete from #a_union if(@a_union=1) select @str_union+=@name select @sql_intersect='select CASE WHEN ISNULL((SELECT '+@name+' FROM TEST WHERE ID='+convert(varchar,@id_1)+' INTERSECT SELECT ' +@name+' FROM TEST WHERE ID='+convert(varchar,@id_2)+'),0)=0 THEN 0 ELSE 1 END' insert into #a_intersect exec(@sql_intersect) SELECT @a_intersect=num from #a_intersect delete from #a_intersect if(@a_intersect=1) select @str_intersect+=@name delete from #test where @name=name and @column_id=column_id end insert into #test SELECT name,column_id FROM SYS.COLUMNS WHERE object_id =object_id('dbo.test') and column_id>1 --select @str_union,@str_intersect,@column_id,@id_1,@id_2 if(@id_2=1) update Test_result set _1_= convert(numeric(10,4),len(@str_intersect)*1.0/len(@str_union)) where id=@id_1 if(@id_2=2) update Test_result set _2_= convert(numeric(10,4),len(@str_intersect)*1.0/len(@str_union)) where id=@id_1 if(@id_2=3) update Test_result set _3_= convert(numeric(10,4),len(@str_intersect)*1.0/len(@str_union)) where id=@id_1 if(@id_2=4) update Test_result set _4_= convert(numeric(10,4),len(@str_intersect)*1.0/len(@str_union)) where id=@id_1 if(@id_2=5) update Test_result set _5_= convert(numeric(10,4),len(@str_intersect)*1.0/len(@str_union)) where id=@id_1 if(@id_2=6) update Test_result set _6_= convert(numeric(10,4),len(@str_intersect)*1.0/len(@str_union)) where id=@id_1 set @id_2=@id_2+1 end set @min_id=@min_id+1 end drop table #test drop table #a_union drop table #a_intersect ----------- --create table test_str_column(id int,str_columns varchar(max)) --insert into test_str_column select 1,null --insert into test_str_column select 2,null --insert into test_str_column select 3,null --insert into test_str_column select 4,null --insert into test_str_column select 5,null --insert into test_str_column select 6,null select * into #temp_test_table from test select name,column_id into #tmp_test_columns from sys.columns where object_id=object_id('dbo.test') and column_id>1 declare @id int,@col_name varchar(20),@col_id int,@string_columns varchar(max),@sql_rs varchar(max),@num_1 int create table #tmp_rs(num int) while (select count(1) from #temp_test_table)>0 begin select top 1 @id=id from #temp_test_table order by id set @string_columns='' while(select count(1) from #tmp_test_columns)>0 begin select top 1 @col_name=name,@col_id=column_id from #tmp_test_columns order by column_id select @sql_rs='select '+@col_name+' from test where id='+convert(varchar,@id) insert into #tmp_rs exec(@sql_rs) select @num_1=num from #tmp_rs if(@num_1=1) set @string_columns=@string_columns+@col_name delete from #tmp_test_columns where @col_name=name and @col_id=column_id delete from #tmp_rs end insert into #tmp_test_columns select name,column_id from sys.columns where object_id=object_id('dbo.test') and column_id>1 update test_str_column set str_columns=@string_columns where id =@id delete from #temp_test_table where @id=id end drop table #temp_test_table drop table #tmp_test_columns drop table #tmp_rs select * from test select * from test_str_column select * from Test_result
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。