asp.net如何抓取AJAX网页数据!最好可以详细点!
asp.net如何抓取AJAX网页数据!最好可以详细点!本人小白啥都不懂麻烦各位大神!我这里有抓取静态页面的代码!!不知可否帮我修改一下!
public string getDataForZjjd(string v_url)
{
try
{
HtmlWeb htmlWeb = new HtmlWeb();
HtmlDocument htmlDoc = htmlWeb.Load(v_url);
//HtmlNodeCollection htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//ul[@id='newest-news']");
//HtmlNodeCollection htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//ul[@class='jin-newsList']");
HtmlNodeCollection htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//ul[@id='J_newsList']");
if (htmlNodeColl == null)
{
return "查询不到数据";
}
HtmlNodeCollection liNodeColl = htmlNodeColl[0].SelectNodes("li");
StringBuilder sb = new StringBuilder();
for (int i = liNodeColl.Count - 1; i >= 0; i--)
{
HtmlNode hn = HtmlNode.CreateNode(liNodeColl[i].OuterHtml);
//string author = hn.SelectSingleNode("//div[@class='author_time']").ChildNodes[1].InnerText; //作者
//string reledate = hn.SelectSingleNode("//div[@class='author_time']").ChildNodes[3].InnerText; //发布时间
//string pic = hn.SelectSingleNode("//div[@class='timg col-sm-3 ']").ChildNodes[1].ChildNodes[0].Attributes[0].Value;//封面
//string title = hn.SelectSingleNode("//h4").InnerText; //标题
//string show = hn.SelectSingleNode("//p[@class='news-p']").InnerText; //简介说明
//string url = hn.SelectSingleNode("//h4").ChildNodes[0].Attributes[0].Value; //内容页URL
//string content = getDataForContent("http://news.jin10.com" + url, "zjjd"); //正文
string author = ""; //作者
HtmlNode reledate3 = hn.SelectSingleNode("//*[@class='jin-newsList__count']"); //发布时间
string reledate2 = hn.SelectSingleNode("//*[@class='jin-newsList__count']").ChildNodes[0].InnerText; //发布时间
string reledate = hn.SelectSingleNode("//*[@class='jin-newsList__count']").ChildNodes[2].InnerText; //发布时间
string pic = hn.SelectSingleNode("//*[@class='J_lazyImg']").Attributes["data-original"].Value;//封面
string title = hn.SelectSingleNode("//*[@class='jin-newsList__title']").InnerText; //标题
string show = hn.SelectSingleNode("//*[@class='jin-newsList__title']").InnerText; //简介说明
string url = hn.SelectSingleNode("a").Attributes["href"].Value; //内容页URL
string content = getDataForContent("http://news.jin10.com" + url, "zjjd"); //正文
content = tihuan(content);
huibaoNews HBNmodel = HBNBLL.GetModelBytype("富豪区", "专家解读", title);
if (HBNmodel == null)
{
HBNmodel = new huibaoNews();
HBNmodel.addtime = DateTime.Now;
HBNmodel.zuozhe = author;
HBNmodel.shijian = reledate;
HBNmodel.biaotu = pic;
HBNmodel.title = title;
HBNmodel.jianjie = show;
HBNmodel.neirong = content;
HBNmodel.state = 2;
HBNmodel.type = "专家解读";
HBNmodel.dalei = "富豪区";
int ct = HBNBLL.Add(HBNmodel);
if (ct > 0)
{
HBNmodel = HBNBLL.GetModel(ct);
HBNmodel.paixu = ct;
HBNBLL.Update(HBNmodel);
}
};
}
return "采集成功";
}
catch (Exception ex)
{
return "读取出错-->" + ex.ToString();
}
}
//获取文章内容 wh,gjtt,gjs
public string getDataForContent(string v_url, string type)
{
string content = "";
try
{
HtmlWeb htmlWeb = new HtmlWeb();
HtmlDocument htmlDoc = htmlWeb.Load(v_url);
if (type == "gjtt")
{
HtmlNodeCollection htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//div[@class='wenzhang_my_area']");
if (htmlNodeColl == null)
{
return "查询不到数据";
}
HtmlNode href = htmlNodeColl[0].SelectSingleNode("p");
content = href.InnerHtml;
}
if (type == "wh")
{
HtmlNodeCollection htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//div[@class='yjl_fx168_article_zhengwen']");
if (htmlNodeColl == null)
{
return "查询不到数据";
}
HtmlNode href = htmlNodeColl[0].SelectSingleNode("//div[@class='TRS_Editor']");
content = href.InnerHtml;
}
if (type == "gjs")
{
HtmlNodeCollection htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//div[@class='jy_fx168_gjs_content clearfix']");
if (htmlNodeColl == null)
{
htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//div[@class='yjl_fx168_article_zhengwen']");
if (htmlNodeColl == null)
{
return "查询不到数据";
}
}
HtmlNode href = htmlNodeColl[0].SelectSingleNode("//div[@class='TRS_Editor']");
content = href.InnerHtml;
}
if (type == "zjjd")
{
HtmlNodeCollection htmlNodeColl = htmlDoc.DocumentNode.SelectNodes("//div[@class='jin-news-article_content']");
if (htmlNodeColl == null)
{
return "查询不到数据";
}
HtmlNode href = htmlNodeColl[0];
content = href.InnerHtml;
}
if (type == "gntt")
{
HttpWebRequest req;
req = WebRequest.Create(new Uri(v_url)) as HttpWebRequest;
req.Method = "GET";
WebResponse rs = req.GetResponse();
Stream rss = rs.GetResponseStream();
HtmlDocument Doc = new HtmlDocument();
Doc.Load(rss);
HtmlNodeCollection htmlNodeColl = Doc.DocumentNode.SelectNodes("//div[@class='art_contextBox']");
if (htmlNodeColl == null)
{
return "查询不到数据";
}
HtmlNode href = htmlNodeColl[0];
content = href.InnerHtml;
}
//移除A标签
content = Regex.Replace(content, @"<a\s*[^>]*>", "", RegexOptions.IgnoreCase);
content = Regex.Replace(content, @"</a>", "", RegexOptions.IgnoreCase);
}
catch (Exception ex)
{
return "读取出错-->" + ex.ToString();
}
return content;
}
tihuanziBLL thzbll = new tihuanziBLL();
public string tihuan(string content)
{
DataSet ds = thzbll.GetList("");
for (int i = 0; i < ds.Tables[0].Rows.Count; i++)
{
content = content.Replace(ds.Tables[0].Rows[i]["yuanzi"].ToString(), ds.Tables[0].Rows[i]["replayzi"].ToString());
}
return content;
}
protected void btnchangezc_Click(object sender, EventArgs e)
{
string nids = this.hfnids.Value;
if (nids != "")
{
string[] nidsz = nids.Substring(0, nids.Length - 1).Split(',');
for (int i = 0; i < nidsz.Length; i++)
{
huibaoNews hbnmodel = HBNBLL.GetModel(int.Parse(nidsz[i]));
hbnmodel.state = 1;
HBNBLL.Update(hbnmodel);
}
db.Show(Page, "操作成功");
}
// searchNews();
}
}
}