62,074
社区成员
发帖
与我相关
我的任务
分享
using mshtml;
namespace Parser
{
class Spider:Form
{
private void Spider_Load(object sender, EventArgs e)
{
webBrowser1.Navigate("登录界面的URL");
}
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
if (e.Url.ToString() != webBrowser1.Url.ToString())
return;
if (webBrowser1.ReadyState != WebBrowserReadyState.Complete)
return;
}
private void button1_Click(object sender, EventArgs e)
{
string htmlDoc = (mshtml.HTMLDocument)webBrowser1.Document.DomDocument;
IHTMLElementCollection eCollection = htmlDoc.all;
foreach (IHTMLElement element in eCollection)
{
//假设该页面用户名密码的输入框都是放在input中的
if (element.tagName.ToLower() == "input")
{
//假设用户名输入框input的name属性值是username,同理密码输入框属性值是password。即<input ... name=username ...>和<input ... name=password...>,不一定非要用name属性,id什么也可以,只要能区分就可以
object attrtext = element.getAttribute("name", 0);
if (attrtext != null)
{
if (attrtext.ToString() == "username")
element.setAttribute("value", "自己赋值");
if (attrtext.ToString() == "password")
element.setAttribute("value", "自己赋值");
}
//假设网页上的提交按钮源代码是<input type='submit' name='submit' value=' 提 交 '>
object attr = element.getAttribute("type", 0);
if (attr != null)
{
if (attr.ToString() == "submit")
{
element.click();
}
}
}
}
}
}
}
这就完成模拟登录啦!
登录完之后再string htmlDoc = (mshtml.HTMLDocument)webBrowser1.Document.DomDocument;就爬取到了登录后的源代码了!
public HttpWebResponse PostData(string strURL, string strArgs, string strReferer, string code, string method, CookieContainer cookieContainer)
{
return PostData(strURL, strArgs, strReferer, code, method, string.Empty,cookieContainer);
}
public HttpWebResponse PostData(string strURL, string strArgs, string strReferer, string code, string method, string contentType, CookieContainer cookieContainer)
{
try
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
myHttpWebRequest.AllowAutoRedirect = true;
myHttpWebRequest.KeepAlive = true;
myHttpWebRequest.Accept = "application/json, text/javascript, */*";
myHttpWebRequest.Referer = strReferer;
myHttpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.96 Safari/537.4";
if (string.IsNullOrEmpty(contentType))
{
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
}
else
{
myHttpWebRequest.ContentType = "contentType";
}
myHttpWebRequest.Method = method;
myHttpWebRequest.Headers.Add("Accept-Encoding", "gzip, deflate,sdch");
if (cookieContainer == null)
{
cookieContainer = new CookieContainer();
}
myHttpWebRequest.CookieContainer = cookieContainer;
byte[] postData = Encoding.GetEncoding(code).GetBytes(strArgs);
myHttpWebRequest.ContentLength = postData.Length;
System.IO.Stream PostStream = myHttpWebRequest.GetRequestStream();
PostStream.Write(postData, 0, postData.Length);
PostStream.Close();
HttpWebResponse response = null;
response = (HttpWebResponse)myHttpWebRequest.GetResponse();
return response;
}
catch (Exception ex)
{
string s = "出错了:" + ex.Message;
return null;
}
}
private void login_ajax()
{
string username = Request.Form["username"];
//登录地址
string LOGIN_URL = "http://mp.weixin.qq.com/cgi-bin/login?lang=zh_CN";
//登录请求来源地址
string LOGIN_REFERER = "http://mp.weixin.qq.com/cgi-bin/loginpage?t=wxm-login&lang=zh_CN";
CookieContainer cookie = new CookieContainer();
if (Session["login_verify_code"] != null)
{
cookie = Session["login_verify_code"] as CookieContainer;
}
string is_update = Request.QueryString["is_update"] ?? "";
string pwd1 = Request.Form["pwd1"];
string pwd2 = Request.Form["pwd2"];
string imgcode = Request.Form["imgcode"];
string register = Request.Form["register"];
string f = Request.Form["f"];
//拼接请求参数
string strArgs = "&username=" + username;
strArgs += "&pwd1=" + pwd1;
strArgs += "&pwd2=" + pwd2;
strArgs += "&imgcode=" + imgcode;
strArgs += "&f="+f;
HttpWebResponse http_response = pt.PostData(LOGIN_URL, strArgs, LOGIN_REFERER, CODE, MOTHED, cookie);
StreamReader reader = new StreamReader(http_response.GetResponseStream(), Encoding.Default);
content = reader.ReadToEnd();
Response.Write(content);
reader.Close();
http_response.Close();
Session["username"]=username;
Session["pwd"] = pwd1;
Session["login_wx"] = cookie;
//此处是登录成功后要取的网页
string temp_url = "http://mp.weixin.qq.com/cgi-bin/userinfopage?t=wxm-setting&lang=zh_CN";
HttpWebResponse http_response_get = pt.GetResponseByGet(temp_url, cookie);
StreamReader reader1 = new StreamReader(http_response_get.GetResponseStream(), Encoding.Default);
string content1 = reader1.ReadToEnd();
}