110,534
社区成员
发帖
与我相关
我的任务
分享
public Form1()
{
InitializeComponent();
string url = "http://www.xxxx.cc/";
webBrowser1.Navigate(url);
}
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
for (int i = 1; i < 8; i++)
{
string[] args = { "AspNetPager1", i.ToString() };
webBrowser1.Document.InvokeScript("__doPostBack", args);
if (webBrowser1.IsBusy == false && webBrowser1.StatusText == "完成")
{
string body = webBrowser1.Document.Body.OuterHtml;
string exp = " 共([\\s\\S]*?)页,当前为第([\\s\\S]*?)页,每页10条";
Match match = Regex.Match(body, exp, RegexOptions.IgnoreCase);
string num = match.Groups[2].Value.ToString();
}
}
}
void First_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
if (this.webBrowser1.ReadyState == WebBrowserReadyState.Complete)
{
this.webBrowser1.DocumentCompleted -= First_DocumentCompleted;
var nav_panel = (from x in this.webBrowser1.Document.GetElementsByTagName("div").OfType<HtmlElement>()
where x.GetAttribute("className") == "page_nav"
select x).First();
Pages = (from li in nav_panel.GetElementsByTagName("LI").OfType<HtmlElement>()
from a in li.GetElementsByTagName("A").OfType<HtmlElement>()
let num = TryInt(a.InnerText)
where num > 0
select new session
{
页号 = num,
uri = new Uri(a.GetAttribute("href"))
}).ToList();
this.webBrowser1.DocumentCompleted += Flip_DocumentCompleted;
Go();
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
this.webBrowser1.DocumentCompleted += First_DocumentCompleted;
}
public class session
{
public int 页号;
public Uri uri;
}
private List<session> Pages;
private int TryInt(string s)
{
int x;
if (!int.TryParse(s, out x))
return -1;
return x;
}
void First_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
if (this.webBrowser1.ReadyState == WebBrowserReadyState.Complete)
{
this.webBrowser1.DocumentCompleted -= First_DocumentCompleted;
var nav_panel = (from x in this.webBrowser1.Document.All.OfType<HtmlElement>()
where x.GetAttribute("className") == "page_nav"
select x).First();
Pages = (from li in nav_panel.GetElementsByTagName("LI").OfType<HtmlElement>()
from a in li.GetElementsByTagName("A").OfType<HtmlElement>()
let num = TryInt(a.InnerText)
where num >0
select new session
{
页号 = num,
uri = new Uri(a.GetAttribute("href"))
}).ToList();
this.webBrowser1.DocumentCompleted += Flip_DocumentCompleted;
Go();
}
}
void Flip_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
if (this.webBrowser1.ReadyState == WebBrowserReadyState.Complete)
Go();
}
private void Go()
{
var index = Pages.Count - 1;
if (index >= 0)
{
var page = Pages[index];
Pages.RemoveAt(index);
this.webBrowser1.Navigate(page.uri);
this.Text = string.Format("正在加载第 {0} 页:{1}", page.页号, page.uri.ToString());
}
}
}
}
用正则,就等于是拿着鱼刺充鱼翅,根本不上档次。
只有非常具体的个别的地方才需要使用正则。有些人学点sql语句就入迷了,甚至取个机器时间、计算个四则运算,也要使用ado.net去调用一下sql数据库。csdn上有些人就好像对正则也持这种过份的姿态,你可以看到,对于你要分析网页这类简单任务来说,对比这个程序你会发现,正则显得成事不足败事有余,只给你添加了不少混乱代码而已。