62,046
社区成员
发帖
与我相关
我的任务
分享
<thead>
<tr>
<td class="table-product">商品名称</td>
<td class="table-price">
<a class="orderup" href="javascript:changeSortByIcon(1)" >
商品售价
</a>
</td>
<td class="table-stock">库存</td>
<td class="table-price">
<a class="orderdown" href="javascript:changeSortByIcon(4)" >
商品单价
</a>
</td>
<td class="table-type">保障类型</td>
</tr>
</thead>
<tbody>
<tr>
<td>
<a href="/commodity/getCommodity.htm?commodityId=10978&commodityTypeForUIURL=yuanbao"
class="listicon-guan"
>【点券全】200传奇世界元宝=179.52元</a>
<p>
商品种类:
元宝/点券<br />游戏/区/服:传奇世界/全区/全服
</p></td>
<td><span class="num">179.52</span>元
<a href="
/sales/getCommodityByPointSetp1.htm?commodityId=10978"
class="buy">立即购买</a>
</td>
<td> 1件
</td>
<td><span class="num">0.8976</span>元</td>
<td>
<span class="listicon-shandian" title="当您完成支付,我们将立即为您发货,无需任何等待">闪电发货</span><span class="listicon-guanfang" title="提供盛大游戏的官方自动商品验证及自动充值,免除您的一切顾虑">官方保障</span>
</td>
</tr>
<tr>
<td>
<a href="/commodity/getCommodity.htm?commodityId=491145&commodityTypeForUIURL=yuanbao"
class="listicon-guan"
>【慧慧宝宝】100传奇世界元宝=89.77元</a>
<p>
商品种类:
元宝/点券<br />游戏/区/服:传奇世界/全区/全服
</p></td>
<td><span class="num">89.77</span>元
<a href="
/sales/getCommodityByPointSetp1.htm?commodityId=491145"
class="buy">立即购买</a>
</td>
<td> 39件
</td>
<td><span class="num">0.8977</span>元</td>
<td>
<span class="listicon-shandian" title="当您完成支付,我们将立即为您发货,无需任何等待">闪电发货</span><span class="listicon-guanfang" title="提供盛大游戏的官方自动商品验证及自动充值,免除您的一切顾虑">官方保障</span>
</td>
</tr>
Regex reg = new Regex(@"<tr>\s*<td>\s*<a\s*href=""(?<url>[^""]+)([^>]+>)(?<name>[^<]+)([^>]+>){7}(?<price>[^<]+)([^>]+>){5}(?<number>[^<]+)([^>]+>){3}(?<single>[^<]+)", RegexOptions.Compiled);
<head>
<title></title>
<script type="text/javascript">
function gettable() {
//tested 是<tbody>的id
var td1 = "";var td1text = "";
var td2 = "";var td2text = "";
var td3 = "";
var td4 = "";var td4text = "";
var href = "";
var reg = /<a[^>]+?href=\"(.+?)\"[^>]+?>([^<]+?)<\/a>/;
var reg1=/<span[^>]+?>([^<]+?)<\/span>([^<\s]+)/;
var testTR = document.getElementById("tested").getElementsByTagName("tr");
for (var j = 0; j < testTR.length; j++) {
td1 = testTR[j].children[0].innerHTML;
if (reg.test(td1)) {
href = RegExp.$1;
td1text = RegExp.$2;
}
td2 = testTR[j].children[1].innerHTML;
if (reg1.test(td2)) {
td2text = RegExp.$1 + RegExp.$2;
}
td3 = testTR[j].children[2].innerHTML;
td4 = testTR[j].children[3].innerHTML;
if (reg1.test(td4)) {
td4text = RegExp.$1 + RegExp.$2;
}
document.write(href.toString() + "<br/>" + td1text.toString() + "<br/>" + td2text + "<br/>" + td3 + "<br/>" + td4text + "<br/><br/><br/><br/>");
}
}
</script>
</head>
<body>
<input type="button" id="" onclick="gettable();" value="确定" />
<table>
<thead>
<tr>
<td class="table-product">
商品名称
</td>
<td class="table-price">
<a class="orderup" href="javascript:changeSortByIcon(1)">商品售价 </a>
</td>
<td class="table-stock">
库存
</td>
<td class="table-price">
<a class="orderdown" href="javascript:changeSortByIcon(4)">商品单价 </a>
</td>
<td class="table-type">
保障类型
</td>
</tr>
</thead>
<tbody id="tested">
<tr>
<td>
<a href="/commodity/getCommodity.htm?commodityId=10978&commodityTypeForUIURL=yuanbao"
class="listicon-guan">【点券全】200传奇世界元宝=179.52元</a>
<p>
商品种类: 元宝/点券<br />
游戏/区/服:传奇世界/全区/全服
</p>
</td>
<td>
<span class="num">179.52</span>元 <a href="
/sales/getCommodityByPointSetp1.htm?commodityId=10978" class="buy">
立即购买</a>
</td>
<td>
1件
</td>
<td>
<span class="num">0.8976</span>元
</td>
<td>
<span class="listicon-shandian" title="当您完成支付,我们将立即为您发货,无需任何等待">闪电发货</span><span
class="listicon-guanfang" title="提供盛大游戏的官方自动商品验证及自动充值,免除您的一切顾虑">官方保障</span>
</td>
</tr>
<tr>
<td>
<a href="/commodity/getCommodity.htm?commodityId=491145&commodityTypeForUIURL=yuanbao"
class="listicon-guan">【慧慧宝宝】100传奇世界元宝=89.77元</a>
<p>
商品种类: 元宝/点券<br />
游戏/区/服:传奇世界/全区/全服
</p>
</td>
<td>
<span class="num">89.77</span>元 <a href="
/sales/getCommodityByPointSetp1.htm?commodityId=491145" class="buy">
立即购买</a>
</td>
<td>
39件
</td>
<td>
<span class="num">0.8977</span>元
</td>
<td>
<span class="listicon-shandian" title="当您完成支付,我们将立即为您发货,无需任何等待">闪电发货</span><span
class="listicon-guanfang" title="提供盛大游戏的官方自动商品验证及自动充值,免除您的一切顾虑">官方保障</span>
</td>
</tr>
</tbody>
</table>
</body>
</html>
private List<string> Match()
{
string html = @"
<thead>
<tr>
<td class=""table-product"">商品名称</td>
<td class=""table-price""> <a class=""orderup"" href=""javascript:changeSortByIcon(1)"" >商品售价</a> </td>
<td class=""table-stock"">库存</td>
<td class=""table-price""> <a class=""orderdown"" href=""javascript:changeSortByIcon(4)"" > 商品单价 </a> </td>
<td class=""table-type"">保障类型</td>
</tr>
</thead>
<tbody>
<tr>
<td> <a href=""/commodity/getCommodity.htm?commodityId=10978&commodityTypeForUIURL=yuanbao"" class=""listicon-guan"" >【点券全】200传奇世界元宝=179.52元</a> <p> 商品种类:元宝/点券<br />游戏/区/服:传奇世界/全区/全服 </p></td>
<td><span class=""num"">179.52</span>元 <a href="" /sales/getCommodityByPointSetp1.htm?commodityId=10978"" class=""buy"">立即购买</a> </td>
<td> 1件 </td>
<td><span class=""num"">0.8976</span>元</td>
<td> <span class=""listicon-shandian"" title=""当您完成支付,我们将立即为您发货,无需任何等待"">闪电发货</span><span class=""listicon-guanfang"" title=""提供盛大游戏的官方自动商品验证及自动充值,免除您的一切顾虑"">官方保障</span> </td>
</tr>
<tr>
<td> <a href=""/commodity/getCommodity.htm?commodityId=491145&commodityTypeForUIURL=yuanbao"" class=""listicon-guan"" >【慧慧宝宝】100传奇世界元宝=89.77元</a> <p> 商品种类:元宝/点券<br />游戏/区/服:传奇世界/全区/全服 </p></td>
<td><span class=""num"">89.77</span>元 <a href="" /sales/getCommodityByPointSetp1.htm?commodityId=491145"" class=""buy"">立即购买</a> </td>
<td> 39件 </td> <td><span class=""num"">0.8977</span>元</td>
<td> <span class=""listicon-shandian"" title=""当您完成支付,我们将立即为您发货,无需任何等待"">闪电发货</span><span class=""listicon-guanfang"" title=""提供盛大游戏的官方自动商品验证及自动充值,免除您的一切顾虑"">官方保障</span> </td>
</tr>
</tbody>
";
int startPos = html.IndexOf("<tbody>") + 7;
int endPos = html.IndexOf("</tbody>", startPos);
List<string> list = new List<string>();
while (startPos < endPos && startPos != -1)
{
startPos = html.IndexOf("<td>", startPos) + 4;
// 链接
startPos = html.IndexOf("href=\"", startPos) + 6;
int end = html.IndexOf("\"", startPos);
list.Add(html.Substring(startPos, end - startPos));
// 品名
startPos = html.IndexOf(">", end) + 1;
end = html.IndexOf("</a>", startPos);
list.Add(html.Substring(startPos, end - startPos));
// 价格
startPos = html.IndexOf("<td>", end) + 4;
startPos = html.IndexOf(">", startPos) + 1;
end = html.IndexOf("<", startPos);
list.Add(html.Substring(startPos, end - startPos));
// 数量
startPos = html.IndexOf("<td>", end) + 4;
end = html.IndexOf("</td>", startPos);
list.Add(html.Substring(startPos, end - startPos).Trim());
// 单价
startPos = html.IndexOf("<td>", end) + 4;
startPos = html.IndexOf(">", startPos) + 1;
end = html.IndexOf("<", startPos);
list.Add(html.Substring(startPos, end - startPos).Trim());
startPos = html.IndexOf("<tr>", end);
}
return list;
}
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.Load(Server.MapPath("~/test.txt"));
HtmlNodeCollection nodes = htmlDoc.DocumentNode.SelectNodes(@"//tbody//tr");
foreach (HtmlNode node in nodes)
{
Response.Write(node.SelectSingleNode(@"td[1]/a").Attributes["href"].Value + "<br/>");
Response.Write(node.SelectSingleNode(@"td[1]/a").InnerText + "<br/>");
Response.Write(node.SelectSingleNode(@"td[2]/span").InnerText + "<br/>");
Response.Write(node.SelectSingleNode(@"td[3]").InnerText + "<br/>");
Response.Write(node.SelectSingleNode(@"td[4]/span").InnerText + "<br/>");
}
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.Load(Server.MapPath("~/test.txt"));
HtmlNodeCollection nodes = htmlDoc.DocumentNode.SelectNodes(@"//tbody//tr");
foreach (HtmlNode node in nodes)
{
Response.Write(node.SelectSingleNode(@"td[1]/a").Attributes["href"].Value + "<br/>");
Response.Write(node.SelectSingleNode(@"td[1]/a").InnerText + "<br/>");
Response.Write(node.SelectSingleNode(@"td[2]/span").InnerText + "<br/>");
Response.Write(node.SelectSingleNode(@"td[3]").InnerText + "<br/>");
Response.Write(node.SelectSingleNode(@"td[4]/span").InnerText + "<br/>");
}
string pattern = @"(?im)(?<=<div[^>]*?table-lists[^>]*?>(?:(?!</?div>)[\s\S])*)<td>\s*<a[^>]+href=(['""])([^'""]+)[^>]+>([^<]+)</a>[\s\S]*?";
pattern += @"<span[^>]+num[^>]*>\s*([^<]+)\s*</span>[\s\S]*?";
pattern += @"<td[^>]*>\s*([^<]+)\s*</td>[\s\S]*?";
pattern += @"<span[^>]+num[^>]*>\s*([^<]+)\s*</span>";
string tempStr = File.ReadAllText(@"C:\Users\dell\Desktop\Test.txt",Encoding.GetEncoding("gb2312"));//读取文档
foreach (Match m in Regex.Matches(tempStr, pattern))
{
//循环输出
string value = m.Value;
string href = m.Groups[2].Value;///commodity/getCommodity.htm?commodityId=555204&commodityTypeForUIURL=yuanbao
string product_name = m.Groups[3].Value;//【乐清人】100热血传奇元宝=104.00元
string sale_price = m.Groups[4].Value;//104.00
string product_num = m.Groups[5].Value;//10件
string single_price = m.Groups[6].Value;//1.0400
}