110,538
社区成员
发帖
与我相关
我的任务
分享
<div class="kqkj_l" id="kj_content">
<div class="qh_kj_h">
<div class="qs_kj_nane">江西11选5第2014022341期开奖结果</div>
</div>
<div class="dqkj_bqxl">
<div class="dqkj_time" style="font-size:10pt;">开奖时间:2014-02-23 15:50:00</div>
<div class="ljtz_btn"><input type="button" value="立即投注" onclick="go_tz();"/></div>
</div>
<div class="dqkj_kjhm">
<div class="kjhm">开奖号码:</div>
<div class="hq_bg">08</div>
<div class="hq_bg">01</div>
<div class="hq_bg">11</div>
<div class="hq_bg">02</div>
<div class="hq_bg">07</div>
</div>
</div>
WebBrowser web = new WebBrowser();
web.DocumentText = "<div class=\"kqkj_l\" id=\"kj_content\"><div class=\"qh_kj_h\"><div class=\"qs_kj_nane\">江西11选5第2014022341期开奖结果</div></div><div class=\"dqkj_bqxl\"><div class=\"dqkj_time\" style=\"font-size:10pt;\">开奖时间:2014-02-23 15:50:00</div><div class=\"ljtz_btn\"><input type=\"button\" value=\"立即投注\" onclick=\"go_tz();\"/></div></div><div class=\"dqkj_kjhm\"><div class=\"kjhm\">开奖号码:</div><div class=\"hq_bg\">08</div><div class=\"hq_bg\">01</div><div class=\"hq_bg\">11</div><div class=\"hq_bg\">02</div><div class=\"hq_bg\">07</div></div></div>";
web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted);
void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
HtmlElementCollection elements = (sender as WebBrowser).Document.GetElementById("kj_content").GetElementsByTagName("div");
//data:2014022341
//time:开奖时间:2014-02-23 15:50:00
//nums:08,01,11,02,07
string data = new Regex(@"\d+", RegexOptions.RightToLeft).Match(elements[1].InnerText).Value;
string time = elements[3].InnerText;
string nums = string.Join(",", elements[5].InnerText.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries).Skip(1));
}
public static string ClearHTMLTags(string HTML)
{
string[] Regexs ={
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};
string[] Replaces ={
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1", //chr(161),
"\xa2", //chr(162),
"\xa3", //chr(163),
"\xa9", //chr(169),
"",
"\r\n",
""
};
string s = HTML;
for (int i = 0; i < Regexs.Length; i++)
{
s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
}
s.Replace("<", "");
s.Replace(">", "");
s.Replace("\r\n", "");
return s;
}
string s = "<div class=\"kqkj_l\" id=\"kj_content\"><div class=\"qh_kj_h\"><div class=\"qs_kj_nane\">江西11选5第2014022341期开奖结果</div></div><div class=\"dqkj_bqxl\">"
+ "<div class=\"dqkj_time\" style=\"font-size:10pt;\">开奖时间:2014-02-23 15:50:00</div><div class=\"ljtz_btn\"><input type=\"button\" value=\"立即投注\" onclick=\"go_tz();\"/></div></div>"
+"<div class=\"dqkj_kjhm\"><div class=\"kjhm\">开奖号码:</div><div class=\"hq_bg\">08</div><div class=\"hq_bg\">01</div><div class=\"hq_bg\">11</div> <div class=\"hq_bg\">02</div>"
+ "<div class=\"hq_bg\">07</div></div> </div>";
string ss = ClearHTMLTags(s);
string sInput = File.ReadAllText(@"C:\Users\myx\Desktop\Test.txt", Encoding.GetEncoding("GB2312"));
string pattern = @"<div[^>]*?id=(['""]?)kj_content\1[^>]*?>[\s\S]*?<div[^>]*?class=(['""]?)qs_kj_nane\2[^>]*?>[^<>]*?(?<date>\d+)期[^<>]*?</div>[\s\S]*?<div[^>]*?class=(['""]?)dqkj_time\3[^>]*?>(?<time>[^>]*?)</div>[\s\S]*?<div[^>]*?class=(['""]?)dqkj_kjhm\4[^>]*?>[\s\S]*?(<div[^>]*?class=""hq_bg""\s*?>(?<code>\d+)</div>\s*?)+</div>";
Match m = Regex.Match(sInput, pattern);
string date = m.Groups["date"].Value;//2014022341
string time = m.Groups["time"].Value;//开奖时间:2014-02-23 15:50:00
string coed = string.Join(",", m.Groups["code"].Captures.Cast<Capture>().Select(a => a.Value));//08,01,11,02,07