求一正则取出Html里的值

cnremix 2014-02-23 04:52:40

<div class="kqkj_l" id="kj_content">
<div class="qh_kj_h">
<div class="qs_kj_nane">江西11选5第2014022341期开奖结果</div>
</div>
<div class="dqkj_bqxl">
<div class="dqkj_time" style="font-size:10pt;">开奖时间:2014-02-23 15:50:00</div>
<div class="ljtz_btn"><input type="button" value="立即投注" onclick="go_tz();"/></div>
</div>
<div class="dqkj_kjhm">
<div class="kjhm">开奖号码:</div>
<div class="hq_bg">08</div>
<div class="hq_bg">01</div>
<div class="hq_bg">11</div>
<div class="hq_bg">02</div>
<div class="hq_bg">07</div>
</div>
</div>


取出
开奖时间:2014-02-23 15:50:00
开奖号码:08,01,11,02,07
开奖期号:2014022341
...全文
106 4 打赏 收藏 转发到动态 举报
写回复
用AI写文章
4 条回复
切换为时间正序
请发表友善的回复…
发表回复
智商余额不足 2014-02-24
  • 打赏
  • 举报
回复

WebBrowser web = new WebBrowser();
web.DocumentText = "<div class=\"kqkj_l\" id=\"kj_content\"><div class=\"qh_kj_h\"><div class=\"qs_kj_nane\">江西11选5第2014022341期开奖结果</div></div><div class=\"dqkj_bqxl\"><div class=\"dqkj_time\"  style=\"font-size:10pt;\">开奖时间:2014-02-23 15:50:00</div><div class=\"ljtz_btn\"><input type=\"button\" value=\"立即投注\" onclick=\"go_tz();\"/></div></div><div class=\"dqkj_kjhm\"><div class=\"kjhm\">开奖号码:</div><div class=\"hq_bg\">08</div><div class=\"hq_bg\">01</div><div class=\"hq_bg\">11</div><div class=\"hq_bg\">02</div><div class=\"hq_bg\">07</div></div></div>";
web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted);

void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
    HtmlElementCollection elements = (sender as WebBrowser).Document.GetElementById("kj_content").GetElementsByTagName("div");
    //data:2014022341
    //time:开奖时间:2014-02-23 15:50:00
    //nums:08,01,11,02,07
    string data = new Regex(@"\d+", RegexOptions.RightToLeft).Match(elements[1].InnerText).Value;
    string time = elements[3].InnerText;
    string nums = string.Join(",", elements[5].InnerText.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries).Skip(1));
}
wind_cloud2011 2014-02-23
  • 打赏
  • 举报
回复

  public static string ClearHTMLTags(string HTML)
        {
            string[] Regexs ={
                        @"<script[^>]*?>.*?</script>",
                        @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
                        @"([\r\n])[\s]+",
                        @"&(quot|#34);",
                        @"&(amp|#38);",
                        @"&(lt|#60);",
                        @"&(gt|#62);",
                        @"&(nbsp|#160);",
                        @"&(iexcl|#161);",
                        @"&(cent|#162);",
                        @"&(pound|#163);",
                        @"&(copy|#169);",
                        @"&#(\d+);",
                        @"-->",
                        @"<!--.*\n"
        };

            string[] Replaces ={
                            "",
                            "",
                            "",
                            "\"",
                            "&",
                            "<",
                            ">",
                            " ",
                            "\xa1", //chr(161),
                            "\xa2", //chr(162),
                            "\xa3", //chr(163),
                            "\xa9", //chr(169),
                            "",
                            "\r\n",
                            ""
        };

            string s = HTML;
            for (int i = 0; i < Regexs.Length; i++)
            {
                s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
            }
            s.Replace("<", "");
            s.Replace(">", "");
            s.Replace("\r\n", "");
            return s;
        }

  string s = "<div class=\"kqkj_l\" id=\"kj_content\"><div class=\"qh_kj_h\"><div class=\"qs_kj_nane\">江西11选5第2014022341期开奖结果</div></div><div class=\"dqkj_bqxl\">"
        + "<div class=\"dqkj_time\"  style=\"font-size:10pt;\">开奖时间:2014-02-23 15:50:00</div><div class=\"ljtz_btn\"><input type=\"button\" value=\"立即投注\" onclick=\"go_tz();\"/></div></div>" 
        +"<div class=\"dqkj_kjhm\"><div class=\"kjhm\">开奖号码:</div><div class=\"hq_bg\">08</div><div class=\"hq_bg\">01</div><div class=\"hq_bg\">11</div>      <div class=\"hq_bg\">02</div>"
        + "<div class=\"hq_bg\">07</div></div> </div>";
          
         string ss = ClearHTMLTags(s);

  • 打赏
  • 举报
回复
string sInput = File.ReadAllText(@"C:\Users\myx\Desktop\Test.txt", Encoding.GetEncoding("GB2312"));
            string pattern = @"<div[^>]*?id=(['""]?)kj_content\1[^>]*?>[\s\S]*?<div[^>]*?class=(['""]?)qs_kj_nane\2[^>]*?>[^<>]*?(?<date>\d+)期[^<>]*?</div>[\s\S]*?<div[^>]*?class=(['""]?)dqkj_time\3[^>]*?>(?<time>[^>]*?)</div>[\s\S]*?<div[^>]*?class=(['""]?)dqkj_kjhm\4[^>]*?>[\s\S]*?(<div[^>]*?class=""hq_bg""\s*?>(?<code>\d+)</div>\s*?)+</div>";
            Match m = Regex.Match(sInput, pattern);
            string date = m.Groups["date"].Value;//2014022341
            string time = m.Groups["time"].Value;//开奖时间:2014-02-23 15:50:00
            string coed = string.Join(",", m.Groups["code"].Captures.Cast<Capture>().Select(a => a.Value));//08,01,11,02,07

110,538

社区成员

发帖
与我相关
我的任务
社区描述
.NET技术 C#
社区管理员
  • C#
  • Web++
  • by_封爱
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告

让您成为最强悍的C#开发者

试试用AI创作助手写篇文章吧