求一个asp.net对敏感字库的过滤程序

indiana_zho 2009-08-08 11:00:23
一个短信内容检查程序,需要在数据库中建立一个敏感字库,然后对用户在页面输入的文字进行检查,如含有敏感字库中的文字,需对用户进行提示“您的输入中含有非法字符”。

这个该如何实现?? 请高手赐教,谢谢
...全文
594 20 打赏 收藏 转发到动态 举报
写回复
用AI写文章
20 条回复
切换为时间正序
请发表友善的回复…
发表回复
hSoso 2011-02-19
  • 打赏
  • 举报
回复
面试题目都会问到,最近河¥蟹比较大,所以很多网站的开发都会涉及到这些内容
kkbac 2010-03-22
  • 打赏
  • 举报
回复
用正则不行么? 怎么都喜欢循环?
artwl_cn 2009-08-10
  • 打赏
  • 举报
回复
好东东,留外名!学习了!
zhaoqiliang527 2009-08-10
  • 打赏
  • 举报
回复
有用,收藏啦!!!
wzp144650 2009-08-10
  • 打赏
  • 举报
回复
mark
mngzilin 2009-08-10
  • 打赏
  • 举报
回复
如果单独对textbox进行过滤的话,会麻烦点。7楼 11楼的方法都可以一试,但是我现在一般对全站进行过滤,可以对不同的url进行不同的过滤
showjim 2009-08-10
  • 打赏
  • 举报
回复

public class badWordsFilter
{
private int maxLength = 0;
private ulong[] charBits;
private ulong[] charWords;
private HashSet<string> words;

public badWordsFilter(string[] badwords)
{
if (badwords != null && badwords.Length != 0)
{
foreach (string badword in badwords)
{
if (badword != null && badword.Length != 0)
{
if (maxLength == 0)
{
charBits = new ulong[4096];
charWords = new ulong[4096];
words = new HashSet<string>();
}
if ((badword.Length == 1 ? (!charWords.getBit(badword[0])) : (!words.Contains(badword))))
{
if (badword.Length == 1)
{
if (maxLength == 0) maxLength = 1;
charWords.setBit(badword[0], true);
charBits.setBit(badword[0], true);
}
else
{
if (badword.Length > maxLength) maxLength = badword.Length;
words.Add(badword);
foreach (char badChar in badword) charBits.setBit(badChar, true);
}
}
}
}
}
}
public bool hasBadWord(string content)
{
bool noBadWord = true;
if (content != null && content.Length != 0 && maxLength != 0)
{
int endLength, length, startIndex = 0, index = 0;
foreach (char c in content)
{
if (charBits.getBit(c))
{
if (charWords.getBit(c)) noBadWord = false;
index++;
}
else
{
if (maxLength != 1)
{
while (noBadWord && startIndex < index - 1)
{
for (length = 2, endLength = Math.Min(maxLength, index - startIndex); noBadWord && length <= endLength; length++)
{
noBadWord = !words.Contains(content.Substring(startIndex, length));
}
startIndex++;
}
}
startIndex = ++index;
}
if (!noBadWord) break;
}
if (noBadWord && maxLength != 1)
{
while (noBadWord && startIndex < index - 1)
{
for (length = 2, endLength = Math.Min(maxLength, index - startIndex); noBadWord && length <= endLength; length++)
{
noBadWord = !words.Contains(content.Substring(startIndex, length));
}
startIndex++;
}
}
}
return !noBadWord;
}
}
public static void setBit(this ulong[] bits, long bitIndex, bool isBit)
{
if (bits != null && bits.Length != 0 && bitIndex >= 0)
{
int index = (int)(bitIndex >> 6);
if (index < bits.Length)
{
ulong andValue = (1UL << (int)(bitIndex & 63));
bits[index] |= andValue;
if (!isBit) bits[index] -= andValue;
}
}
}
public static bool getBit(this ulong[] bits, long bitIndex)
{
bool isBit = false;
if (bits != null && bits.Length != 0 && bitIndex >= 0)
{
int index = (int)(bitIndex >> 6);
isBit = (index < bits.Length) && (bits[index] & (1UL << (int)(bitIndex & 63))) != 0;
}
return isBit;
}
chenjianyong94 2009-08-10
  • 打赏
  • 举报
回复
只需要在数据提交到数据库之前,调用一下下面的方法,就可以了。c#语法.
/// <summary>
/// 过滤标记
/// </summary>
/// <param name="NoHTML">包括HTML,脚本,数据库关键字,特殊字符的源码 </param>
/// <returns>已经去除标记后的文字</returns>
public static string NoHTML(string Htmlstring)
{
if (Htmlstring == null)
{
return "";
}
else
{
//删 除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删 除HTML
//Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "xp_cmdshell", "", RegexOptions.IgnoreCase);

//删 除与数据库相关的词
// Htmlstring = Regex.Replace(Htmlstring, "select", "", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, "insert", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "delete from", "", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, "count''", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "drop table", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "truncate", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "asc", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring, "mid", "", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, "char", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "xp_cmdshell", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "exec master", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "net localgroup administrators", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring, "and", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "net user", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring, "or", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring, "net", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring,"*", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring,"-", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring, "delete", "", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, "drop", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring, "script", "", RegexOptions.IgnoreCase);

//特殊的字符
//Htmlstring = Htmlstring.Replace("<", "");
//Htmlstring = Htmlstring.Replace(">", "");
Htmlstring = Htmlstring.Replace("*", "");
// Htmlstring = Htmlstring.Replace("-", "");
//Htmlstring = Htmlstring.Replace("?", "");
// Htmlstring = Htmlstring.Replace(",", "");
//Htmlstring = Htmlstring.Replace("/", "");
Htmlstring = Htmlstring.Replace(";", "");
Htmlstring = Htmlstring.Replace("*/", "");
Htmlstring = Htmlstring.Replace("\r\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

return Htmlstring;
}

}
杰子 2009-08-10
  • 打赏
  • 举报
回复
[Quote=引用 5 楼 mngzilin 的回复:]
如果在全站的页面中都进行过滤的话,在Global.asax中配置:
C# codevoid Application_BeginRequest(object sender, EventArgs e)
{if (Regex.IsMatch(Request.RawUrl.ToLower(),@"/manager/")==false)//不检查manager目录for (int i=0; i< Request.Form.Count;i++)//遍历Post参数,隐藏域除外
{if (Request.Form[i].ToString()=="__VIEWSTATE")continue;if (IsDanger(Request.Form[i].ToString()))
{
Response.Write("您提交的内容中含有非法字符,已经被拒绝.");
Response.End();
}

}

}protectedbool IsDanger(string InText)
{string word=@"exec|insert|select|delete|update|master|truncate|char|declare|join|
iframe|href|script|<|>|request";if (InText==null)returnfalse;if (Regex.IsMatch(InText,word))returntrue;returnfalse;
}
[/Quote]
顶,这种黑名单的方法很好的。楼主可以试试
indiana_zho 2009-08-10
  • 打赏
  • 举报
回复
[Quote=引用 5 楼 mngzilin 的回复:]
如果在全站的页面中都进行过滤的话,在Global.asax中配置:
C# codevoid Application_BeginRequest(object sender, EventArgs e)
{if (Regex.IsMatch(Request.RawUrl.ToLower(),@"/manager/")==false)//不检查manager目录for (int i=0; i< Request.Form.Count;i++)//遍历Post参数,隐藏域除外
{if (Request.Form[i].ToString()=="__VIEWSTATE")continue;if (IsDanger(Request.Form[i].ToString()))
{
Response.Write("您提交的内容中含有非法字符,已经被拒绝.");
Response.End();
}

}

}protectedbool IsDanger(string InText)
{string word=@"exec|insert|select|delete|update|master|truncate|char|declare|join|
iframe|href|script|<|>|request";if (InText==null)returnfalse;if (Regex.IsMatch(InText,word))returntrue;returnfalse;
}
[/Quote]

谢谢,觉得很有用,虽然还没有试验.
如果不是针对全局的话该怎么办呢,只是针对某个textbox,检查用户的输入中是不是含有敏感字库中的内容(包括中文)
shinf 2009-08-08
  • 打赏
  • 举报
回复
mark
gdjlc 2009-08-08
  • 打赏
  • 举报
回复
再度提升!.NET脏字过滤算法
http://www.cnblogs.com/xingd/archive/2008/02/01/1061800.html



再度改进,在脏字可能存在的情况下,例如出现了多个脏字前Length-1部分时,性能相比http://www.cnblogs.com/xingd/archive/2008/01/31/1060425.html中描述的又提升了300%~400%。

直接贴出全部代码了,通过新增的一个byte[char.MaxValue]和BitArray(char.MaxValue),减少了大量的Substring和GetHashCode的调用。耗的内存也不算多,除HashSet外,仅需要144k内存。

引用此文或者使用此代码请说明出处,谢谢,以便于我将来的更新。

2008-02-02修订:if (index > 0 || (fastCheck[text[index]] & 1) == 0) 应去掉index > 0的判断,这个优化考虑的不够成熟。感谢sumtec和灵感之源指出错误。避免最短匹配时,可以在 if (hash.Contains(sub)) 之后,可以加入判断 if ((fastLength[begin] >> Math.Min(j,7)) == 0),然后再return true。

2008-02-03修订:for循环内部的if ((fastCheck[current] & 1) == 0)应为if ((fastCheck[current] & 1) == 0 && count == j)。修正bug并加入大小写敏感后,效率降低1倍。

public class BadWordsFilter
{
private HashSet<string> hash = new HashSet<string>();
private byte[] fastCheck = new byte[char.MaxValue];
private byte[] fastLength = new byte[char.MaxValue];
private BitArray charCheck = new BitArray(char.MaxValue);
private BitArray endCheck = new BitArray(char.MaxValue);
private int maxWordLength = 0;
private int minWordLength = int.MaxValue;

public BadWordsFilter()
{

}

public void Init(string[] badwords)
{
foreach (string word in badwords)
{
maxWordLength = Math.Max(maxWordLength, word.Length);
minWordLength = Math.Min(minWordLength, word.Length);

for (int i = 0; i < 7 && i < word.Length; i++)
{
fastCheck[word[i]] |= (byte)(1 << i);
}

for (int i = 7; i < word.Length; i++)
{
fastCheck[word[i]] |= 0x80;
}

if (word.Length == 1)
{
charCheck[word[0]] = true;
}
else
{
fastLength[word[0]] |= (byte)(1 << (Math.Min(7, word.Length - 2)));
endCheck[word[word.Length - 1]] = true;

hash.Add(word);
}
}
}

public string Filter(string text, string mask)
{
throw new NotImplementedException();
}

public bool HasBadWord(string text)
{
int index = 0;

while (index < text.Length)
{
int count = 1;

if (index > 0 || (fastCheck[text[index]] & 1) == 0)
{
while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
}

char begin = text[index];

if (minWordLength == 1 && charCheck[begin])
{
return true;
}

for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
{
char current = text[index + j];

if ((fastCheck[current] & 1) == 0)
{
++count;
}

if ((fastCheck[current] & (1 << Math.Min(j, 7))) == 0)
{
break;
}

if (j + 1 >= minWordLength)
{
if ((fastLength[begin] & (1 << Math.Min(j - 1, 7))) > 0 && endCheck[current])
{
string sub = text.Substring(index, j + 1);

if (hash.Contains(sub))
{
return true;
}
}
}
}

index += count;
}

return false;
}
}
IHandler 2009-08-08
  • 打赏
  • 举报
回复
[Quote=引用 3 楼 pinyu 的回复:]
我也借这个话题想问一句,他这个短信用楼上的方法肯定没问题,如果一篇长文章咋办?在一个for循环中反复indexof?
[/Quote]

干嘛要循环换?
mngzilin 2009-08-08
  • 打赏
  • 举报
回复
如果在全站的页面中都进行过滤的话,在Global.asax中配置:

void Application_BeginRequest(object sender, EventArgs e)
{
if (Regex.IsMatch(Request.RawUrl.ToLower(), @"/manager/")==false)//不检查manager目录
for (int i=0; i < Request.Form.Count;i++)//遍历Post参数,隐藏域除外

{
if (Request.Form[i].ToString() == "__VIEWSTATE") continue;
if (IsDanger(Request.Form[i].ToString()))
{
Response.Write("您提交的内容中含有非法字符,已经被拒绝.");
Response.End();
}

}

}
protected bool IsDanger(string InText)
{
string word = @"exec|insert|select|delete|update|master|truncate|char|declare|join|
iframe|href|script|<|>|request";
if (InText == null)
return false;
if (Regex.IsMatch(InText,word))
return true;
return false;
}
nosuchtracter 2009-08-08
  • 打赏
  • 举报
回复
设置一个关键字库
提交的时候进行匹配
用正则表达式
pinyu 2009-08-08
  • 打赏
  • 举报
回复
我也借这个话题想问一句,他这个短信用楼上的方法肯定没问题,如果一篇长文章咋办?在一个for循环中反复indexof?
IHandler 2009-08-08
  • 打赏
  • 举报
回复
按你的要求可以再用户输入的时候,用Javascript来验证
IHandler 2009-08-08
  • 打赏
  • 举报
回复
这个敏感词库可以自己定义
然后使用String.IndexOf("关键词")来判断

62,046

社区成员

发帖
与我相关
我的任务
社区描述
.NET技术交流专区
javascript云原生 企业社区
社区管理员
  • ASP.NET
  • .Net开发者社区
  • R小R
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告

.NET 社区是一个围绕开源 .NET 的开放、热情、创新、包容的技术社区。社区致力于为广大 .NET 爱好者提供一个良好的知识共享、协同互助的 .NET 技术交流环境。我们尊重不同意见,支持健康理性的辩论和互动,反对歧视和攻击。

希望和大家一起共同营造一个活跃、友好的社区氛围。

试试用AI创作助手写篇文章吧