不知道这个能不能帮到你
设计思路:对比一个字符串A与关键字字符串B之间的关联关系.
情况1:完全相关,例如,'中国人民银行'同时包含关键字 人民,银行.
情况2:部分相关,例如,'中国人民银行'部分包含关键字 人民,银行,xx.
情况3:模糊相关,例如'中国人民银行',与关键字'x中,人民',中部分字眼有相关性,部分则没有
使用方法
SELECT dbo.fn_GetSimilar(字符串,关键字(多个关键字用逗号隔开),适配关键字数量,如果查询关键字有10个,可是只有5个关键字能被搜索到就算完全相关的话写5,如果模糊查找写-1)
运行结果
SELECT dbo.fn_GetSimilar(N'中国人民银行',N'人民,银行',3)
1.0000
SELECT dbo.fn_GetSimilar(N'中国人民银行',N'人民,银行,xx',3)
0.6667
SELECT dbo.fn_GetSimilar(N'中国人民银行',N'x中,人民',-1)
0.7500
代码
ALTER FUNCTION [dbo].[fn_Split1]( @String nvarchar (max), @Delimiter nvarchar (10),@Type NVARCHAR(10) )
RETURNS @ValueTable TABLE ([Value] NVARCHAR(max),[id] int)
AS
--1. 字符串(分隔符分隔组成的字符串)转换成多行记录的表
-- Function
BEGIN
DECLARE @NextString nvarchar(max),
@Pos int,
@NextPos int,
@CommaCheck nvarchar(1),
@id int
IF @Type='Delimiter'
begin
set @id=1
SET @NextString = ''
SET @CommaCheck = right(@String,1)
SET @String = @String + @Delimiter
SET @Pos = CHARINDEX(@Delimiter,@String)
SET @NextPos = 1
WHILE (@pos <> 0)
BEGIN
SET @NextString = SUBSTRING(@String,1,@Pos - 1)
INSERT INTO @ValueTable ( [Value],[id]) VALUES (@NextString,@id) -- (@NextString+'#Delimiter#'+@Delimiter,@id)
SET @String = SUBSTRING(@String,@pos +1,LEN(@String))
SET @NextPos = @Pos
SET @pos = CHARINDEX(@Delimiter,@String)
set @id = @id +1
END
END
IF @Type='Length'
BEGIN
DECLARE @len INT,@Length INT
SET @Length=@Delimiter
SET @len=(SELECT LEN(@String))
SET @id=1
WHILE (@len>0)
BEGIN
SET @NextString=(select SUBSTRING(@String,@id,@Length))
INSERT INTO @ValueTable ( [Value],[id]) SELECT @NextString,@id WHERE @NextString<>''
SET @id=@id+@Length
SET @len=@len-1
END
END
RETURN
END
go
--DROP FUNCTION fn_GetSimilar
ALTER FUNCTION fn_GetSimilar
(@StrA As nVarchar(4000), /*待比较的字符*/@StrB As nVarchar(255),/*关键字*/@strength INT/*比较强度*/)
RETURNS DECIMAL(18,4)
--Returns @table Table (Frequency DECIMAL(18,4)/*关键字比重*/,Correlation DECIMAL(18,4)/*关键字相关性*/)
AS
BEGIN
DECLARE @Result DECIMAL(18,4),@lenSame DECIMAL(18,4),@lenA DECIMAL(18,4),@lenB DECIMAL(18,4),@lenSameA DECIMAL(18,4),@lenSameB DECIMAL(18,4)
,@re DECIMAL(18,4)
,@Frequency DECIMAL(18,4),@Correlation DECIMAL(18,4)
set @lenA=CONVERT(DECIMAL(18,4),len(@StrA))
set @lenB=(SELECT COUNT(*) FROM dbo.fn_Split1(@StrB,',','Delimiter') )
SET @strength=(CASE WHEN @strength>@lenB THEN @lenB ELSE @strength END)
IF (@strength<>-1)
BEGIN--非模糊查询,关键字不能发生更改
SET @lenSame=CONVERT(DECIMAL(18,4),(SELECT COUNT(*) FROM (SELECT 1 AS id,@StrA as VALUE) a,dbo.fn_Split1(@StrB,',','Delimiter') b WHERE a.VALUE LIKE N'%'+b.VALUE+'%'))
IF (@strength=@lenB)
BEGIN--最高强度,必须全匹配
SET @re=(CASE WHEN @lenSame=@strength THEN 1 ELSE 0 END)
END
IF (@strength BETWEEN 0 AND @lenB)
BEGIN--不要求全匹配
SET @re=@lenSame/@strength
END
END
IF (@strength=-1)
BEGIN--模糊匹配
DECLARE @BlurrySource TABLE (id int,VALUE nvarchar(50))
DECLARE @BlurryResulte TABLE (id int,VALUE DECIMAL(18,4))
DECLARE @BlurrySum TABLE (id int,VALUE nvarchar(50))
DECLARE @Blurry TABLE (id int,VALUE nvarchar(50))
DECLARE @BlurryNumber INT,@length int
INSERT INTO @BlurrySource (id,VALUE) SELECT id,Value FROM dbo.fn_Split1(@StrA,1,'Length')
INSERT INTO @BlurrySum (id,VALUE) SELECT id,Value FROM dbo.fn_Split1(@StrB,',','Delimiter')
SET @BlurryNumber=(SELECT COUNT(*) FROM @BlurrySum)
WHILE (@BlurryNumber>0)
BEGIN
SET @BlurryNumber=(SELECT COUNT(*) FROM @BlurrySum)
DELETE @Blurry
INSERT INTO @Blurry (id,VALUE) SELECT id,Value FROM dbo.fn_Split1( (SELECT value FROM @BlurrySum WHERE id=@BlurryNumber) ,1,'Length')
SET @length=(select LEN(value) FROM @BlurrySum WHERE id=@BlurryNumber)
SET @lenSame=CONVERT(DECIMAL(18,4),(SELECT COUNT(*) FROM @BlurrySource a,@Blurry b WHERE a.VALUE=b.VALUE))/@length
INSERT INTO @BlurryResulte (id,value) VALUES (@BlurryNumber,@lenSame)
DELETE @BlurrySum WHERE id=@BlurryNumber
END
SET @re=(select AVG(value) FROM @BlurryResulte)
END
RETURN @re
END
go
试试 这个相似度函数
create FUNCTION fn_GetSimilar
(
@StrA As Varchar(255), --传入的待比较的字符
@StrB As Varchar(255) --期望字符
)
RETURNS int
AS
BEGIN
DECLARE @Result int
declare @lenA int
declare @lenB int
declare @lenSameA int
declare @lenSameB int
set @lenA=len(@StrA)
set @lenB=len(@StrB)
--计算A中字符在B中存在的数量
select @lenSameA=count(*) from dbo.fn_splitstr(@strA) where @StrB like '%'+splitValue+'%'
--计算B中字符在A中存在的数量
select @lenSameB=count(*) from dbo.fn_splitstr(@StrB) where @StrA like '%'+splitValue+'%'
set @Result =(@lenSameA+@lenSameB)*100 /(@lenA+@lenB)
RETURN @Result
END
select dbo.fn_getsimilar('asdf','fdsa')