111,120
社区成员
发帖
与我相关
我的任务
分享 private string[] GetImagePath(string webSite)
{
WebBrowser webInstance = new WebBrowser();
webInstance.Navigate("http://www.baidu.com");
if (webInstance.Document != null)
{
HtmlDocument doc = webInstance.Document;
string[] urls = (string[])Array.CreateInstance(Type.GetType("System.String"), doc.Images.Count);
foreach (HtmlElement imgElement in doc.Images)
{
urls[urls.Length] = imgElement.GetAttribute("src");
}
return (urls);
}
else
{
return (new string[0]);
}
}
WebBrowser web = new WebBrowser();
web.Navigate("http://www.baidu.com");
web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(delegate(object sender, WebBrowserDocumentCompletedEventArgs e)
{
HtmlElementCollection imgs =web.Document.GetElementsByTagName("img");
string[] arr=new string[imgs.Count];
for (int i = 0; i < imgs.Count; i++)
{
arr[i] = imgs[i].GetAttribute("src");
}
});
/// <summary>
/// 通过URL取网页源代码
/// </summary>
/// <param name="url">URL</param>
/// <param name="encoding">网页编码</param>
/// <returns></returns>
private string GetHtmlCode(string url, Encoding encoding)
{
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
string html = (sr.ReadToEnd());
resStream.Close();
sr.Close();
return html;
}
//取图片链接
string html = GetHtmlCode("http://www.baidu.com", Encoding.GetEncoding("gb2312"));
Regex reg = new Regex(@"(?i)<img\b[^>]*?src=(['""]?)([^'""\s>]+)\1[^>]*>");
MatchCollection mc = reg.Matches(html);
foreach (Match m in mc)
{
richTextBox2.Text += m.Groups[2].Value + "\n";
}