110,534
社区成员
发帖
与我相关
我的任务
分享
public static void Test()
{
string html = @"<a href=""/article/2010/1202/article_21642.html"" title=""四大食用油巨头被约谈:两节不得涨价"" target=""_blank"">四大食用油巨头被约谈:两节不得涨价</a>";
Regex reg = GetRegexLikeText("食用油巨头");
Console.WriteLine(reg.IsMatch(html));
}
public static Regex GetRegexLikeText(string keyword)
{
return new Regex(@"(?<=<a[^>]+?href=(['""]?))[^'""\s]+(?=\1[^>]*>((?!</a).)*?" + Regex.Escape(keyword) + @".*?</a>)");
}
/// <summary>
/// 查询相应标题,返回Url地址。。
/// </summary>
/// <param name="htmlSource">网页源代码。。</param>
/// <param name="keyword">单个关键词。。</param>
/// <returns>查询结果,如果没有匹配项则数组长度为0。。</returns>
static public string[] GetUrlString(string htmlSource, string keyword)
{
string pattern = string.Format("(?<=<a\\s+href=\")[^\"]+(?=\"[^>]*>[^<]*(?:{0})+[^<]*</a>)", keyword);
MatchCollection ms = Regex.Matches(htmlSource, pattern, RegexOptions.IgnoreCase);
string[] urlString = new string[ms.Count];
for (int i = 0; i < ms.Count; i++)
{
urlString[i] = ms[i].Value;
}
return urlString;
}
string result = Regex.Match(yourHtml,@"(?<=<a[^>]+?href=(['""]?))[^'""\s]+(?=\1[^>]*>((?!</a).)*?四大食用油巨头.*?</a>)").Value;
void Main()
{
string html=@"<a href=""/article/2010/1202/article_21642.html"" title=""四大食用油巨头被约谈:两节不得涨价"" target=""_blank"">四大食用油巨头被约谈:两节不得涨价</a>";
Match m =Regex.Match(html,@"(?i)<a\shref=""(?<href>.*?)""\stitle=""四大食用油巨头.*"".*?>.*?</a>");
if(m.Success)
{
Console.WriteLine(m.Groups["href"].Value);
// /article/2010/1202/article_21642.html
}
}