111,098
社区成员




/// <summary>
/// 优化的算法
/// </summary>
public class BadWordsFilter2
{
private HashSet<string> hash = new HashSet<string>();
private ushort[] fastCheck = new ushort[char.MaxValue + 1];
private ushort[] startLength = new ushort[char.MaxValue + 1];
private ushort[] endLength = new ushort[char.MaxValue + 1];
private int maxWordLength = 0;
private int minWordLength = int.MaxValue;
public void AddKey(string word)
{
if (word.Length > 16)
{
throw new Exception("参数最大16个字符");
}
maxWordLength = Math.Max(maxWordLength, word.Length);
minWordLength = Math.Min(minWordLength, word.Length);
//字符出现的位置(1-16),
for (int i = 0; i < word.Length; i++)
{
fastCheck[word[i]] |= (byte)(1 << i);
}
ushort mask = (ushort)(1 << word.Length - 1);
//以x开始的字符的长度
startLength[word[0]] |= mask;
//以x结束的字符的长度
endLength[word[word.Length - 1]] |= mask;
hash.Add(word);
}
public bool HasBadWord(string text)
{
for (int index = 0; index < text.Length; index++)
{
int count = 0;
int maxIndex = Math.Min(maxWordLength + index, text.Length);
char begin = text[index];
for (int j = index; j < maxIndex; j++)
{
char current = text[j];
ushort mask = (ushort)(1 << count);
if ((fastCheck[current] & mask) == 0)
{
index += count;
break;
}
++count;
if ((startLength[begin] & mask) > 0 && (endLength[current] & mask) > 0)
{
string sub = text.Substring(index, count);
if (hash.Contains(sub))
{
//index += (count - 1);
return true;
}
}
}
}
return false;
}
public string FindOne(string text)
{
for (int index = 0; index < text.Length; index++)
{
int count = 0;
int maxIndex = Math.Min(maxWordLength + index, text.Length);
char begin = text[index];
for (int j = index; j < maxIndex; j++)
{
char current = text[j];
ushort mask = (ushort)(1 << count);
if ((fastCheck[current] & mask) == 0)
{
index += count;
break;
}
++count;
if ((startLength[begin] & mask) > 0 && (endLength[current] & mask) > 0)
{
string sub = text.Substring(index, count);
if (hash.Contains(sub))
{
index += (count - 1);
return sub;
}
}
}
}
return string.Empty;
}
public IEnumerable<string> FindAll(string text)
{
for (int index = 0; index < text.Length; index++)
{
int count = 0;
int maxIndex = Math.Min(maxWordLength + index, text.Length);
char begin = text[index];
for (int j = index; j < maxIndex; j++)
{
char current = text[j];
ushort mask = (ushort)(1 << count);
if ((fastCheck[current] & mask) == 0)
{
index += count;
break;
}
++count;
if ((startLength[begin] & mask) > 0 && (endLength[current] & mask) > 0)
{
string sub = text.Substring(index, count);
if (hash.Contains(sub))
{
index += (count - 1);
yield return sub;
break;
}
}
}
}
}
}
using System;
using System.Linq;
using System.Text.RegularExpressions;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
var a_txt = @"尼玛的|{ 编者按:[你太坏了]}
我就太阳了|{编者按:[不要这样子]}
F.U.C.K|{[别骂人,这样真的不好。]}
去si吧|{[你确定要这样吗?]}
你了不起|{[好吧,败给你了。一会带你看星星!]}";
var input = "尼玛的的的的我我我我就太阳了了了你了不起起起";
var separator = new[]
{
"\r\n"
};
var dict =
a_txt.Split(separator, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Split('|')).ToDictionary(
s => s[0],
s => s[1]);
var regex = string.Join("|", dict.Keys);
var output = Regex.Replace(input, regex, m => dict[m.Value]);
Console.WriteLine(output);
}
}
}
Dictionary<string, string> dic = new Dictionary<string, string>();
dic.Add("尼玛的", "编者按:[你太坏了]");
Console.WriteLine(dic["尼玛的"]);
//foreach(var kv in dic)