高手请进!!!! 求C#去除字符串中html标记的正则表达式~~~~~~~~~~~~~~~~

liuhaixun 2008-07-14 03:21:51

如题!!!! 求高手指点

...全文

156 5 打赏收藏转发到动态举报

写回复

用AI写文章

5 条回复

切换为时间正序

请发表友善的回复…

发表回复

liuhaixun 2008-07-14

打赏
举报

谢谢各位大哥,搞定了

dowoocn 2008-07-14

打赏
举报



#region 去除 htmlCode 中所有的HTML标签(包括标签中的属性) public static string StripHtml(string htmlCode)

		/// <summary>

		/// 去除 htmlCode 中所有的HTML标签(包括标签中的属性)

		/// </summary>

		/// <param name="htmlCode">包含 HTML 代码的字符串。</param>

		/// <returns>返回一个不包含 HTML 代码的字符串</returns>

		public static string StripHtml(string htmlCode)

		{

			if (null == htmlCode || 0 == htmlCode.Length)

			{

				return string.Empty;

			}

			return Regex.Replace(htmlCode, @"<[^>]+>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Multiline);

		}

		#endregion

pupo 2008-07-14

打赏
举报

public static string RemoveHtml(string content)
{
string newstr=FilterScript(content);
string regexstr=@"<[^>]*>";
return Regex.Replace(newstr,regexstr,string.Empty,RegexOptions.IgnoreCase);
}

public static string FilterScript(string content)
{
if(content==null || content=="")
{
return content;
}
string regexstr=@"(?i)<script([^>])*>(\w|\W)*</script([^>])*>";//@"<script.*</script>";
content=Regex.Replace(content,regexstr,string.Empty,RegexOptions.IgnoreCase);
content=Regex.Replace(content,"<script([^>])*>",string.Empty,RegexOptions.IgnoreCase);
return Regex.Replace(content,"</script>",string.Empty,RegexOptions.IgnoreCase);
}