茗香淡然 2015年09月30日
请教!!C#敏感字替换特定字符串

现在过虑敏感字算法很多,五花八门的,但很多的不是把敏感字替换成‘*’就是直接消灭了了事,但我现在想要的是替换成特定的字符串。
  a.txt 中设定的关键字替换:

尼玛的{ 编者按:[你太坏了]}
我就太阳了{编者按:[不要这样子]}
F.U.C.K{[别骂人,这样真的不好。]}
去si吧{[你确定要这样吗?]}
你了不起{[好吧,败给你了。一会带你看星星!]}

  现在我想把‘’前面定义为[敏感字],然后替换成花括号‘{}’中的字符串。

  有没有类似于二维数组这样的存储效果:
string[][] array = new string[][];
array[0][0] = "尼玛的";
array[0][1] = "编者按:[你太坏了]";
array[1][0] = "我就太阳了";
array[1][1] ="编者按:[不要这样子]";

一碰到array[i][0]的敏感字,就替换为array[i][1]中的字符串。

http://www.cnblogs.com/yeerh/archive/2011/10/20/2219035.html 优化的算法

/// <summary>
/// 优化的算法
/// </summary>
public class BadWordsFilter2
{
private HashSet<string> hash = new HashSet<string>();

private ushort[] fastCheck = new ushort[char.MaxValue + 1];
private ushort[] startLength = new ushort[char.MaxValue + 1];
private ushort[] endLength = new ushort[char.MaxValue + 1];

private int maxWordLength = 0;
private int minWordLength = int.MaxValue;

public void AddKey(string word)
{
if (word.Length > 16)
{
throw new Exception("参数最大16个字符");
}

maxWordLength = Math.Max(maxWordLength, word.Length);
minWordLength = Math.Min(minWordLength, word.Length);
//字符出现的位置(1-16),
for (int i = 0; i < word.Length; i++)
{
fastCheck[word[i]] |= (byte)(1 << i);
}

ushort mask = (ushort)(1 << word.Length - 1);
//以x开始的字符的长度
startLength[word[0]] |= mask;
//以x结束的字符的长度
endLength[word[word.Length - 1]] |= mask;

hash.Add(word);
}

public bool HasBadWord(string text)
{
for (int index = 0; index < text.Length; index++)
{
int count = 0;
int maxIndex = Math.Min(maxWordLength + index, text.Length);
char begin = text[index];
for (int j = index; j < maxIndex; j++)
{
char current = text[j];
ushort mask = (ushort)(1 << count);
if ((fastCheck[current] & mask) == 0)
{
index += count;
break;
}
++count;
if ((startLength[begin] & mask) > 0 && (endLength[current] & mask) > 0)
{
string sub = text.Substring(index, count);
if (hash.Contains(sub))
{
//index += (count - 1);
return true;
}
}
}
}
return false;
}
public string FindOne(string text)
{
for (int index = 0; index < text.Length; index++)
{
int count = 0;
int maxIndex = Math.Min(maxWordLength + index, text.Length);
char begin = text[index];
for (int j = index; j < maxIndex; j++)
{
char current = text[j];
ushort mask = (ushort)(1 << count);
if ((fastCheck[current] & mask) == 0)
{
index += count;
break;
}
++count;
if ((startLength[begin] & mask) > 0 && (endLength[current] & mask) > 0)
{
string sub = text.Substring(index, count);
if (hash.Contains(sub))
{
index += (count - 1);
return sub;
}
}
}
}
return string.Empty;
}

public IEnumerable<string> FindAll(string text)
{
for (int index = 0; index < text.Length; index++)
{
int count = 0;
int maxIndex = Math.Min(maxWordLength + index, text.Length);
char begin = text[index];
for (int j = index; j < maxIndex; j++)
{
char current = text[j];
ushort mask = (ushort)(1 << count);
if ((fastCheck[current] & mask) == 0)
{
index += count;
break;
}
++count;
if ((startLength[begin] & mask) > 0 && (endLength[current] & mask) > 0)
{
string sub = text.Substring(index, count);
if (hash.Contains(sub))
{
index += (count - 1);
yield return sub;
break;
}
}
}
}
}
}
...全文
182 点赞 收藏 9
写回复
9 条回复

还没有回复,快来抢沙发~

发动态
发帖子
C#
创建于2007-09-28

8.4w+

社区成员

64.0w+

社区内容

.NET技术 C#
社区公告
暂无公告