111,126
社区成员
发帖
与我相关
我的任务
分享//设置提取网页中链接的正则表达式
string param = "href=\"(?<link>[^\"]*)\">";
//提取网页中的链接
//stringline是我读入的网页字符串
MatchCollection mc = Regex.Matches(stringline, param, RegexOptions.IgnoreCase);
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://www.163.com");
request.Timeout = 20000;
request.ServicePoint.ConnectionLimit = 100;
request.ReadWriteTimeout = 30000;
request.Method = "GET";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode != HttpStatusCode.OK)
return;
StreamReader sr = new StreamReader(response.GetResponseStream());
Regex reg = new Regex(@"<a.*?href=""(?<href>[^""]*)""[^>]*>", RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(sr.ReadToEnd()); //设定要查找的字符串
sr.Close();
foreach (Match m in mc)
{
if(m.Success)
{
Response.Write(m.Groups["href"].Value+"<br/>");
}
}
Response.End();