62,265
社区成员
发帖
与我相关
我的任务
分享
/// <summary>
/// 清除HTML代码
/// </summary>
/// <param name="HTMLStr"></param>
/// <returns></returns>
public static string DelHtmlCode(string HTMLStr)
{
//方法一
return System.Text.RegularExpressions.Regex.Replace(HTMLStr, "<[^>]*>", "");
}
public static string GetContentSummary(string content, int length, bool StripHTML)
{
string NowCheckLength = "";
if (length < content.Length)
{
NowCheckLength = "1";
}
if (string.IsNullOrEmpty(content) || length == 0)
return "";
if (StripHTML)
{
Regex re = new Regex("<[^>]*>");
content = re.Replace(content, "");
content = content.Replace(" ", "").Replace(" ", "");
if (content.Length <= length
)
return content;
else
CheckLeng = "1";
return content.Substring(0, length) + "……";
}
else
{
if (content.Length <= length)
return content;
int pos = 0, npos = 0, size = 0;
bool firststop = false, notr = false, noli = false;
StringBuilder sb = new StringBuilder();
while (true)
{
if (pos >= content.Length)
break;
string cur = content.Substring(pos, 1);
if (cur == "<")
{
string next = content.Substring(pos + 1, 3).ToLower();
if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0)
{
npos = content.IndexOf(">", pos) + 1;
}
else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0)
{
npos = content.IndexOf(">", pos) + 1;
if (size < length)
sb.Append("<br/>");
}
else if (next.IndexOf("br") == 0)
{
npos = content.IndexOf(">", pos) + 1;
if (size < length)
sb.Append("<br/>");
}
else if (next.IndexOf("img") == 0)
{
npos = content.IndexOf(">", pos) + 1;
if (size < length)
{
sb.Append(content.Substring(pos, npos - pos));
size += npos - pos + 1;
}
}
else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0)
{
npos = content.IndexOf(">", pos) + 1;
if (size < length)
{
sb.Append(content.Substring(pos, npos - pos));
}
else
{
if (!noli && next.IndexOf("/li") == 0)
{
sb.Append(content.Substring(pos, npos - pos));
noli = true;
}
}
}
else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0)
{
npos = content.IndexOf(">", pos) + 1;
if (size < length)
{
sb.Append(content.Substring(pos, npos - pos));
}
else
{
if (!notr && next.IndexOf("/tr") == 0)
{
sb.Append(content.Substring(pos, npos - pos));
notr = true;
}
}
}
else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0)
{
npos = content.IndexOf(">", pos) + 1;
if (size < length)
{
sb.Append(content.Substring(pos, npos - pos));
}
else
{
if (!notr)
{
sb.Append(content.Substring(pos, npos - pos));
}
}
}
else
{
npos = content.IndexOf(">", pos) + 1;
sb.Append(content.Substring(pos, npos - pos));
}
if (npos <= pos)
npos = pos + 1;
pos = npos;
}
else
{
if (size < length)
{
sb.Append(cur);
size++;
}
else
{
if (!firststop)
{
CheckLeng = "1";
//sb.Append("……");
firststop = true;
}
}
pos++;
}
}
string HTML = sb.ToString();
if (NoHTML(HTML).Length!=NoHTML(content).Length || CheckLeng == "1")
{
CheckLeng = "1";
HTML = HTML + "……";
}
return HTML;
}
}
public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
public static string DropHTML(string strHtml)
{
string[] aryReg ={
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""''])(\\[""''tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};
string[] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1",//chr(161),
"\xa2",//chr(162),
"\xa3",//chr(163),
"\xa9",//chr(169),
"",
"\r\n",
""
};
string newReg = aryReg[0];
string strOutput = strHtml;
for (int i = 0; i < aryReg.Length; i++)
{
Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
strOutput = regex.Replace(strOutput, aryRep[i]);
}
strOutput.Replace("<", "");
strOutput.Replace(">", "");
strOutput.Replace("\r\n", "");
return strOutput;
}
public static String delHtml(String inputString) {
String htmlStr = inputString; // 含html标签的字符串
String textStr = "";
java.util.regex.Pattern p_script;
java.util.regex.Matcher m_script;
java.util.regex.Pattern p_html;
java.util.regex.Matcher m_html;
try {
String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式
String regEx_script = "<[\s]*?script[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?script[\s]*?>"; // 定义script的正则表达式{或<script[^>]*?>[\s\S]*?<\/script>
p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
m_script = p_script.matcher(htmlStr);
htmlStr = m_script.replaceAll(""); // 过滤script标签
p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
m_html = p_html.matcher(htmlStr);
htmlStr = m_html.replaceAll(""); // 过滤html标签
textStr = htmlStr;
} catch (Exception e) {
System.err.println("Html2Text: " + e.getMessage());
}
return textStr;// 返回文本字符串
}