110,556
社区成员
发帖
与我相关
我的任务
分享
private static void TestRegex05()
{
string s1 = @"<dl class=""agent"">
<dt><a target=""_blank"" href=""http://sz.esf.sina.com.cn/shop/46402/""><b>黄佳勇</b></a><img title=""用户级别: 银牌"" src=""http://ss1.res.fangyou.com/comm/images/grade/userlevel/silver_medal.png""></dt>
<dd>手机:<span class=""highlight"">13715287170</span></dd>
<dd>所属公司/门店:深圳市中联房地产企业发展有限公司 万科城秋桐分行</dd>
<dd>服务区域: <a href=""http://sz.esf.sina.com.cn/house/a5/"" target=""_blank"">龙岗</a>
<a href=""http://sz.esf.sina.com.cn/house/a5-b11/"" target=""_blank"">坂田</a>
</a>
</dd>
<dd class=""enterShop""><a target=""_blank"" href=""http://sz.esf.sina.com.cn/shop/46402/"">进入店铺</a></dd>
</dl>
<dl class=""agent"">
...
</dl>
还有很多个
<dl class=""agent"">
...
</dl>";
string s2 = @"<ul class=""jjr_description"">
<li><b>姓名</b>:粟玉春</li>
<li><b>电话</b>:<span class=""word_red"">13760464817</span></li>
<li><b>服务区域</b>:罗湖区</li>
<li><b>所属公司</b>:中原地产</li>
<li><b>房源总量</b>:102套</li>
<li><a href=""/common/esf09/broker/esf_broker.php?broker_id=22416917"" target=""_blank""><img src=""/common/modules/newesf/esf08_img/entershop.gif"" alt=""进入我的店铺"" title=""进入我的店铺"" /></a></li>
</ul>
<ul class=""jjr_description"">
...
</ul>";
string s3 = @"<li class=""agenttxt"">
<div>
<span>姓名:</span><span class=""font13lai"">
<span id=""ctl00_ContentPlaceHolder1_gvUSer_ctl02_lblUserName"" style=""font-size:14px;font-weight:bold;"">庄秀贞</span></span>
<span class=""font12hei""><a href='http://13510916071.szhome.com'target=""_blank"">【进入店铺】</a></span>
<span style=""margin-left: 10px;"" class=""red12"">
<span id=""ctl00_ContentPlaceHolder1_gvUSer_ctl02_lblPhone"">13510916071</span></span></div>
<div>所属公司:<span id=""ctl00_ContentPlaceHolder1_gvUSer_ctl02_lblCompany"" class=""font13lai"">中原地产</span>
<span id=""ctl00_ContentPlaceHolder1_gvUSer_ctl02_lblBranch"" class=""font13lai"">紫薇花园分行</span></div>
<div>
服务社区:<span id=""ctl00_ContentPlaceHolder1_gvUSer_ctl02_lblCommunityName"" class=""font13lai"">紫薇花园</span>
</div>
</li>
<li class=""agenttxt"">
...
</li>
还有很多个
<li class=""agenttxt"">
...
</li>
";
Regex reg1 = new Regex(@"(?in)<dl class=""agent"">([^>]+>){3}(?<name>[^<]+)([^>]+>){6}(?<mobile>[^<]+)([^>]+>){3}[^:]+:(?<address>[^<]+)", RegexOptions.Compiled);
Regex reg2 = new Regex(@"(?in)<ul class=""jjr_description"">([^>]+>){3}:?(?<name>[^<]+)([^>]+>){5}(?<mobile>[^<]+)([^>]+>){9}:?(?<company>[^<]+)", RegexOptions.Compiled);
Regex reg3 = new Regex(@"(?in)<li class=""agenttxt"">([^>]+>){5}(?<name>[^<]+)([^>]+>){8}(?<mobile>[^<]+)([^>]+>){5}(?<company>[^<]+)", RegexOptions.Compiled);
Console.WriteLine("-------------s1-------------");
Match m1 = reg1.Match(s1);
if (m1.Success)
{
Console.WriteLine(m1.Groups["name"].Value);
Console.WriteLine(m1.Groups["mobile"].Value);
Console.WriteLine(m1.Groups["address"].Value);
}
Console.WriteLine("-------------s2-------------");
Match m2 = reg2.Match(s2);
if (m2.Success)
{
Console.WriteLine(m2.Groups["name"].Value);
Console.WriteLine(m2.Groups["mobile"].Value);
Console.WriteLine(m2.Groups["company"].Value);
}
Console.WriteLine("-------------s3-------------");
Match m3 = reg3.Match(s3);
if (m3.Success)
{
Console.WriteLine(m3.Groups["name"].Value);
Console.WriteLine(m3.Groups["mobile"].Value);
Console.WriteLine(m3.Groups["company"].Value);
}
}
-------------s1-------------
黄佳勇
13715287170
深圳市中联房地产企业发展有限公司 万科城秋桐分行
-------------s2-------------
粟玉春
13760464817
中原地产
-------------s3-------------
庄秀贞
13510916071
中原地产
"(?ins)(?<=<ul[^<>]*>)((?!</ul>).)*<li><b>姓名</b>:\s*(?<name>[^\s<>]+)</li>\s*<li><b>电话</b>:\s*<span[^<>]+>(?<tel>[^\s<>]+)\s*</span></li>((?!</ul>).)*<li><b>房源总量</b>:\s*(?<cnt>[^\s<>]+)</li>\s*"
取Groups["name"] Groups["tel"] Groups["cnt"]