.net 远程抓取页面信息
代码如下:
static void MainFunction()
{
//http://www.windowsphone.com/zh-CN/games 首页-中国
//http://www.windowsphone.com/zh-CN/apps/7ead781c-50f1-43bc-be2c-0c604da39c98 详细页
//http://www.windowsphone.com/en-US/marketplace 首页-美国
string _URL = "http://www.windowsphone.com/zh-CN/games";
string _TestStr = SendDataByPost(_URL);
}
static string SendDataByPost(string Url)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
request.Method = "GET";
request.Host = "news.bjx.com.cn";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
request.Headers[HttpRequestHeader.AcceptLanguage] = "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate";
var property = typeof(WebHeaderCollection).GetProperty("InnerCollection", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic);
if (property != null)
{
var collection = property.GetValue(request.Headers, null) as NameValueCollection;
collection["Connection"] = "Keep-Alive";
}
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream myResponseStream = response.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("gb2312"));
string retString = myStreamReader.ReadToEnd();
myStreamReader.Close();
myResponseStream.Close();
return retString;
}
我要抓去以上三个页面的信息,以下是采集到的信息:
html><head><meta http-equiv="REFRESH" content="0; URL=http://www.windowsphone.com/zh-CN/apps/d56d814f-4fa8-410a-87c1-aea0cede6f87"><scripttype="text/javascript">function OnBack(){}</script></head>< ml>
不明白到底怎么回事,希望高手能给我解答一下:)