62,242
社区成员




Regex aa = new Regex(@"^<div class=\"name\"><div>(?<title>.+)</div><a href="(?<url>.+)">更多</a>$", RegexOptions.Multiline);
MatchCollection matchs = aa.Matches(sHtml);
@"<div\s*class=""name"">\s*<div>(?<title>[^<>]+)</div>\s*<a\s*href=""?(?<url>[^""\s]+)""?>更多\s*</a>"
/*<div class="name">
<div>高峰大站车</div>
<a href="/city-type-lines/59.html">更多</a>
<div class="name">
<div>高峰线路</div>
<a href="/city-type-lines/60.html">更多</a>
<div class="name">
<div>假日线</div>
<a href="/city-type-lines/61.html">更多</a>
<div class="name">
<div>接送线</div>
<a href="/city-type-lines/62.html">更多</a>
<div class="name">
<div>快速公交系列</div>
<a href="/city-type-lines/63.html">更多</a>
<div class="name">
<div>快速选择</div>
<a href="/city-type-lines/64.html">更多</a>
<div class="name">
<div>旅游线</div>
<a href="/city-type-lines/65.html">更多</a>
<div class="name">
<div>市区线路</div>
<a href="/city-type-lines/66.html">更多</a>
<div class="name">
<div>小区巴士</div>
<a href="/city-type-lines/67.html">更多</a>
<div class="name">
<div>学校线</div>
<a href="/city-type-lines/68.html">更多</a>
<div class="name">
<div>夜间线路</div>
<a href="/city-type-lines/69.html">更多</a>
<div class="name">
<div>准快速公交线路</div>
<a href="/city-type-lines/70.html">更多</a>
*/
//12个匹配结果
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace web
{
public partial class test : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
if (!IsPostBack)
{
//需要结果
// 列出这样的 <div>高峰大站车</div><a href="/city-type-lines/59.html">更多</a>
string htmlcode = GetHTML("http://www.8684.com/?city=hangzhou", "utf-8");
// Regex aa = new Regex(@"^<div\s*class=""name"">\s*<div>(?<title>[^<>]+)</div>\s*<a\s*href=""?(?<url>[^""\s]+)""?>更多\s*</a>\s*$", RegexOptions.Multiline | RegexOptions.IgnoreCase);
Regex aa = new Regex(@"^<div class=""name""><div>(?<title>.+)</div><a href=""(?<url>.+)"">更多</a>$",RegexOptions.Multiline | RegexOptions.IgnoreCase);
MatchCollection marlist = aa.Matches(htmlcode);
Response.Write(marlist.Count);
//(@"^<div class=\"name\"><div>(?<title>.+)</div><a href="(?<url>.+)">更多</a>$",
}
}
/// <summary>
/// 获取网页源代码
/// </summary>
/// <param name="url">URL路径</param>
/// <param name="encoding">编码方式</param>
/// <returns></returns>
public string GetHTML(string url, string encoding)
{
WebClient web = new WebClient();
byte[] buffer = web.DownloadData(url);
return Encoding.GetEncoding(encoding).GetString(buffer);
}
}
}
//少个" - -!
@"^<div\s*class=""name"">\s*<div>(?<title>[^<>]+)</div>\s*<a\s*href=""?(?<url>[^""\s]+)""?>更多\s*</a>\s*$"
//1加了@之后转义"要用""
//2匹配多行不是把正则写成多行而是让正则匹配多行- -!
//3匹配空格要用\s
//4贪婪匹配慎重 .+像这种很容易造成匹配结果不准确
@"^<div\s*class=""name"">\s*<div>(?<title>[^<>]+)</div>\s*<a\s*href=""?(?<url>[^"\s]+)""?>更多\s*</a>\s*$"
改为@"^<div class=""name"">\s*<div>(?<title>.+)</div>\s*<a href="(?<url>.+)">更多</a>$"试试
或者
string pstr = @"<div class=""name"">
<div>(?<title>.+) </div>
<a href="(?<url>.+)">更多 </a>";