62,046
社区成员
发帖
与我相关
我的任务
分享
string test = @"<IMG height=16 src=""/WebHHEmailSYS1/images/001001171164/up.gif"" width=16 border=0><A HREF=""/WebHHEmailSYS1/Email/001001171164/20101203053111028.rar"">20101203053111028.rar </A>
<img src=""dfsdfasdfas"">
<img src='dfsdfasdfas'>
<A HREF=""/WebHHEmailSYS1/Email/001001171164/20101203050738canvas.rar"">20101203050738canvas.rar </A> <IMG height=112 src=""/WebHHEmailSYS1/images/001001171164/image.jpg"" width=881 border=0>";
Regex reg = new Regex(@"(?i)<(?:(?<a>)a|img)[^>]*?(?:href|src)=(['""]?)(?(a)(?<url>[^'""\s>]+)|(?<img>[^'""\s>]+))\1[^>]*>");
MatchCollection mc = reg.Matches(test);
List<string> hrefList = new List<string>();
List<string> srcList = new List<string>();
foreach (Match m in mc)
{
if (m.Groups["url"].Value != "")
{
hrefList.Add(m.Groups["url"].Value);
}
else
{
srcList.Add(m.Groups["img"].Value);
}
}
//输出验证
richTextBox2.Text += "============href===========\n";
foreach (string s in hrefList)
{
richTextBox2.Text += s + "\n";
}
richTextBox2.Text += "============src===========\n";
foreach (string s in srcList)
{
richTextBox2.Text += s + "\n";
}
/*---------输出---------
============href===========
/WebHHEmailSYS1/Email/001001171164/20101203053111028.rar
/WebHHEmailSYS1/Email/001001171164/20101203050738canvas.rar
============src===========
/WebHHEmailSYS1/images/001001171164/up.gif
dfsdfasdfas
dfsdfasdfas
/WebHHEmailSYS1/images/001001171164/image.jpg
*/
1:我的 html 里面是很多过 a , img的, 获取出来 什么格式?
//取出来的就是a的链接 img的src,是个字符串
2:m.Groups["path"] 这个没有编号的样子 ? 比如, 所有 a的 一组, img的一组
//这个m.groups["path"]是正则分组的,你可以看下过客兄的博客有介绍
sb.Append(m.Groups["url"].Value + "<br/>");//这个是取a的所有链接href
sb.Append(m1.Groups["path"].Value + "<br/>");//这个是取img的所有src
//我以为是固定连着的所以写在一起!现在分开了取了!
StringBuilder sb = new StringBuilder();
string str = "<IMG height=16 src=\"/WebHHEmailSYS1/images/001001171164/up.gif\" width=16 border=0><A HREF=\"/WebHHEmailSYS1/Email/001001171164/20101203053111028.rar\">20101203053111028.rar </A>";
Regex reg = new Regex(@"(?i)<a[^>]*?href=(['""]?)(?<url>[^'""\s]+)\1[^>]*?>");
MatchCollection match = reg.Matches(str);
foreach (Match m in match)
{
sb.Append(m.Groups["url"].Value + "<br/>");
}
Regex reg1 = new Regex(@"(?i)<img[^>]*?src=(['""]?)(?<path>[^'""\s]+)\1[^>]*?>");
MatchCollection match1 = reg1.Matches(str);
foreach (Match m1 in match1)
{
sb.Append(m1.Groups["path"].Value + "<br/>");
}
Response.Write(sb.ToString());
MatchCollection mc = Regex.Matches(yourHtml,@"(?<=(?is)<img.*?src=(['"]?))(?<url>[\w/:.]+)(?=\1)");
foreach(Match m in mc)
{
m.Value;//<--
}
@"(?<=<(img|a))[^<>]+?(href|src)=(['""])(?<g>[^'""]+)\3"
//Groups["g"];
//结果
/*
WebHHEmailSYS1/Email/001001171164/20101203050738canvas.rar
/WebHHEmailSYS1/images/001001171164/image.jpg
*/
StringBuilder sb = new StringBuilder();
string str = "<A HREF=\"/WebHHEmailSYS1/Email/001001171164/20101203050738canvas.rar\">20101203050738canvas.rar </A> <IMG height=112 src=\"/WebHHEmailSYS1/images/001001171164/image.jpg\" width=881 border=0>";
Regex reg = new Regex(@"(?i)<a[^>]*?href=(['""]?)(?<url>[^'""\s]+)\1[^>]*?>[^>]*?</a>[^<>]*?<img[^>]*?src=['""]?(?<path>[^'""\s]+)\1[^>]*?>");
MatchCollection match = reg.Matches(str);
foreach (Match m in match)
{
sb.Append(m.Groups["url"].Value + "<br/>");
sb.Append(m.Groups["path"].Value);
}
Response.Write(sb.ToString());
" 要转义
Regex reg=new Regex(@"(?is)(?<=<img\ssrc=['""]).*?[^'""](?=['""])");