62,046
社区成员
发帖
与我相关
我的任务
分享
#region 移除字符串中的html标记
/// <summary>
/// 移除字符串中的html标记
/// </summary>
/// <param name="html">要过滤的字符串</param>
/// <returns>返回的结果</returns>
public static string RemoveHtml(string html)
{
string result = html;
MatchCollection matches = objReg.Matches(html);
foreach (Match match in matches)
{
result = result.Replace(match.Value, string.Empty);
}
return HttpUtility.HtmlEncode(result.Replace("\"", ""));
}
private static readonly Regex objReg = new Regex("<.+?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant);
String str = @"<a hrfe = "" >gafgafgdfgadfgadf </a>gfdagadfgadfgdafgdafgadfgdafgdafgdafgadfgdaf";
str = str.Replace("\"","\"\"");
Console.WriteLine(str.Substring(0,20));
/// <summary>
/// Html过滤
/// </summary>
/// <param name="pHtml"></param>
/// <returns></returns>
public static string ReplaceHTML(string pHtml)
{
return TestReplace.ReplaceHtmlTag(pHtml).Replace(" ", string.Empty).Replace(" ", string.Empty);
}
public class TestReplace
{
private TestReplace() { }
private static string sNR = "\r\n";
private static string sXiaoYu = "<";
private static string sDaYu = ">";
private static IList<Regex> iList = new List<Regex>();
private static string[] aReplace = new string[]
{
"","","","","","","","\"","&","<",">","","\xa1","\xa2","\xa3","\xa9",""
};
static TestReplace()
{
string[] aPattern = new string[]
{
@"<script.*?</script>",
@"<style.*?</style>",
@"<.*?>",
@"<(.[^>]*)>",
@"([\r\n])[\s]+",
@"-->",
@"<!--.*",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);"
};
for (int i = 0; i < aPattern.Length; i++)
{
iList.Add(new Regex(aPattern[i]));
}
}
/// <summary>
/// 去除HTML标记
/// </summary>
/// <param name="Htmlstring">包括HTML的源码 </param>
/// <returns>已经去除后的文字</returns>
public static string ReplaceHtmlTag(string Htmlstring)
{
Htmlstring = Htmlstring.Replace(sNR, string.Empty);
Regex r;
for (int i = 0; i < iList.Count; i++)
{
r = iList[i];
if (r != null)
Htmlstring = r.Replace(Htmlstring, aReplace[i], -1, 0);
}
Htmlstring = Htmlstring.Replace(sXiaoYu, string.Empty);
Htmlstring = Htmlstring.Replace(sDaYu, string.Empty);
Htmlstring = Htmlstring.Replace(sNR, string.Empty);
return Htmlstring;
}
}
public string NoHTML(string Htmlstring) //去除HTML标记
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @" <script[^>]*?>.*? </script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" <!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", " <", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace(" <", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}