7,763
社区成员
发帖
与我相关
我的任务
分享
public class Reptile
{
Uri uri;
public Reptile(string url)
{
this.uri = new Uri(url);
}
public void GetLinks()
{
var list = this.GetLinks(this.GetHtml(uri));
list.ForEach(x =>
{
Console.WriteLine(x);
});
Console.WriteLine(list.Count);
}
private List<string> GetLinks(string html)
{
string patten = @"<a[\s\S]*?href=(?<k1>[""'])(?<k2>[\s\S]*?)\k<k1>";
List<string> urls = new List<string>();
Regex.Matches(html, patten).Cast<Match>().ToList().ForEach(x =>
{
urls.Add(x.Groups["k2"].Value);
});
return urls;
}
private string GetHtml(Uri uri)
{
WebClient webClient = new WebClient();
webClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
Byte[] byteArr = webClient.DownloadData(uri); //从指定网站下载数据
return Encoding.UTF8.GetString(byteArr); //如果获取网站页面采用的是UTF-8,则使用这句
}
}
我用这个代码做了下测试