用正则去掉html标记
/// <summary>
/// 移除Html中的标记
/// </summary>
/// <param name="html">html字符串</param>
/// <returns>返回移除后的文本</returns>
public static string RemoveHtml(string html)
{
html = html.ToLower();
Regex regex1 = new Regex(@"<script[\s\S]+</script *>", RegexOptions.IgnoreCase);
Regex regex2 = new Regex(@" href *= *[\s\S]*script *:", RegexOptions.IgnoreCase);
Regex regex3 = new Regex(@" no[\s\S]*=", RegexOptions.IgnoreCase);
Regex regex4 = new Regex(@"<iframe[\s\S]+</iframe *>", RegexOptions.IgnoreCase);
Regex regex5 = new Regex(@"<frameset[\s\S]+</frameset *>", RegexOptions.IgnoreCase);
Regex regex6 = new Regex(@"\<img[^\>]+\>", RegexOptions.IgnoreCase);
Regex regex7 = new Regex(@"</p>", RegexOptions.IgnoreCase);
Regex regex8 = new Regex(@"<p>", RegexOptions.IgnoreCase);
Regex regex9 = new Regex(@"<[^>]*>", RegexOptions.IgnoreCase);
html = regex1.Replace(html, ""); //过滤<script></script>标记
html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
html = regex4.Replace(html, ""); //过滤iframe
html = regex5.Replace(html, ""); //过滤frameset
html = regex6.Replace(html, ""); //过滤frameset
html = regex7.Replace(html, ""); //过滤frameset
html = regex8.Replace(html, ""); //过滤frameset
html = regex9.Replace(html, "");
html = html.Replace(" ", "");
html = html.Replace("</strong>", "");
html = html.Replace("<strong>", "");
html = html.Replace(" ", "");
html = html.Replace(" ", "");