110,561
社区成员
发帖
与我相关
我的任务
分享
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>
...(省略)
<ul class="shop-list">
<li>
<h4>
<a target="_blank" href="http://item.taobao.com/auction/item_detail-0db1-91ea75432fd891a8e003b4a984c92d44.htm">
<<中文版3ds Max 8家居效果图设计完全攻略>>随书光盘
</a>
</h4>
<div class="item">
<div class="pic"> <a title="<<中文版3ds Max 8家居效果图设计完全攻略>>随书光盘" href="http://item.taobao.com/auction/item_detail-0db1-91ea75432fd891a8e003b4a984c92d44.htm" target="_blank">
<img src="http://img03.taobaocdn.com/bao/uploaded/i3/20071205/911/T1z50aXmY0gky9Tz.7_065338.jpg_sum.jpg"/>
</a>
</div>
</div>
</li>
</ul>
...(省略)
Regex regUl = new Regex(@"<ul\s+class=""shop-list"">(?>(?:(?!</?ul\b).)*)</ul>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
Regex regCon = new Regex(@"<a\s+title=""(?<title>[^""]*)""\s+href=""(?<url>[^""]*)""[^>]*>\s*<img\s+src=""(?<src>[^""]*)""[^>]*>\s*</a>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
Match mUl = regUl.Match(yourStr);
if (mUl.Success)
{
Match mCon = regCon.Match(mUl.Value);
if(mCon.Success)
{
richTextBox2.Text += mCon.Groups["title"].Value + "\n";
richTextBox2.Text += mCon.Groups["url"].Value + "\n";
richTextBox2.Text += mCon.Groups["src"].Value + "\n";
}
}
WebResponse resp = myWebRequest.GetResponse();
Stream stream = resp.GetResponseStream();
XmlTextReader reader = new XmlTextReader(stream);
reader.XmlResolver = null; // ignore the DTD
reader.WhitespaceHandling = WhitespaceHandling.None;
<ul class="shop-list">
<li>
<h4>
<a target="_blank" href="http://item.taobao.com/auction/item_detail-0db1-91ea75432fd891a8e003b4a984c92d44.htm">
<<中文版3ds Max 8家居效果图设计完全攻略>>随书光盘
</a>
</h4>
<div class="item">
<div class="pic"> <a title="<<中文版3ds Max 8家居效果图设计完全攻略>>随书光盘" href="http://item.taobao.com/auction/item_detail-0db1-91ea75432fd891a8e003b4a984c92d44.htm" target="_blank">
<img src="http://img03.taobaocdn.com/bao/uploaded/i3/20071205/911/T1z50aXmY0gky9Tz.7_065338.jpg_sum.jpg"/>
</a>
</div>
</div>
</li>
</ul>
Regex reg=new Regex(@"<ul[^>]*?class=""shop-list""[^>]*>((?>(?<o><ul[^>]*>)|(?<-o></ul>)|(?:(?!</?ul)[\s\S]))*)(?(o)(?!))</ul>");
Match m=reg.match(htmlsrc);
string s="";
if(m.success)
s=m.group[0].value;
XmlDocument xm = new XmlDocument();
xm.LoadXml(s);
//
//后面可以用xmldocumen的一些方法来取值
//
XmlNodeList NodeList = xml.SelectNodes("ul/div");//这里XPath可以写上你的筛选条件,
foreach(XmlNode xn in NodeList)
{
//要存入到数据库或者数字等
}
MatchCollection mc = Regex.Matches(yourStr, @"<a\s+title=""(?<title>[^""]*)""\s+href=""(?<url>[^""]*)""[^>]*>\s*<img\s+src=""(?<src>[^""]*)""[^>]*>\s*</a>", RegexOptions.IgnoreCase);
foreach (Match m in mc)
{
richTextBox2.Text += m.Groups["title"].Value + "\n";
richTextBox2.Text += m.Groups["url"].Value + "\n";
richTextBox2.Text += m.Groups["src"].Value + "\n";
}