62,244
社区成员




System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(@"(?is)category:""(?<category>[^""]*?)"",\s+cLink:""(?<cLink>[^""]*?)"",\s+title:""(?<title>[^""]*?)"",\s+link:""(?<link>[^""]*?)"",\s+media:""(?<media>[^""]*?)"",\s+author:""(?<author>[^""]*?)"",\s+pubDate:""(?<pubDate>[^""]*?)""", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.MatchCollection m = reg.Matches(str.Substring(str.IndexOf("item:"),str.Length-str.IndexOf("item:"))); //设定要查找的字符串
<!--[294,57,15] published at 2010-09-27 10:01:49 from #187 by system-->
var sinaRss = {pubDate:"2010-09-25 6:29", link:"",
item:[
{
category:"美 巡 赛",
cLink:"http://golf.sina.com.cn/pgatour.html",
title:"图文-巡回锦标赛第四轮 福瑞克夺冠后兴奋不已",
link:"http://sports.sina.com.cn/golf/p/2010-09-27/09585221946.shtml",
media:"新浪体育讯",
author:"",
pubDate:"2010/09/27 9:58"
},
{
category:"美 巡 赛",
cLink:"http://golf.sina.com.cn/pgatour.html",
title:"图文-巡回锦标赛第四轮 福瑞克与妻子分享喜悦",
link:"http://sports.sina.com.cn/golf/p/2010-09-27/09575221944.shtml",
media:"新浪体育讯",
author:"",
pubDate:"2010/09/27 9:57"
},
{
category:"美 巡 赛",
cLink:"http://golf.sina.com.cn/pgatour.html",
title:"图文-巡回锦标赛第四轮 福瑞克紧紧拥抱爱妻",
link:"http://sports.sina.com.cn/golf/p/2010-09-27/09565221943.shtml",
media:"新浪体育讯",
author:"",
pubDate:"2010/09/27 9:56"
},
System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(@"(?is)category:""(?<category>[^""]*?)"",\s+cLink:""(?<cLink>[^""]*?)"",\s+title:""(?<title>[^""]*?)"",\s+link:""(?<link>[^""]*?)"",\s+media:""(?<media>[^""]*?)"",\s+author:""(?<author>[^""]*?)"",\s+pubDate:""(?<pubDate>[^""]*?)"",\s+", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.MatchCollection m = reg.Matches(str.Substring(str.IndexOf("item:"),str.Length-str.IndexOf("item:"))); //设定要查找的字符串
protected void Page_Load(object sender, EventArgs e)
{
//string strurl="http://blog.hnce.net"; //欲获取的网页地址
string strurl = " http://info.secu.hc360.com/list/news.shtml";
WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
//获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
myWebClient.Credentials = CredentialCache.DefaultCredentials;
//从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
byte[] pagedata = myWebClient.DownloadData(strurl);
//以下两句每次只要使用一条即可,功能是一样是用来转换字符集,根据获取网站页面的字符编码选择
string result = Encoding.Default.GetString(pagedata);
// result = result.Substring(result.IndexOf("<!--all industry start-->")+22 , (result.IndexOf("<!--all industry end-->")-result.IndexOf("<!--all industry start-->")-22) );
result = result.Substring(result.IndexOf("jrsd_0914")+37, (result.IndexOf("1099916") - result.IndexOf("jrsd_0914")-37));
result = StripHTML(result);
result = result.Replace("href=\"", "href=\"Default.aspx?key=http://info.secu.hc360.com");
//如果获取网站页面采用的是GB2312,则使用这句
//string result=Encoding.UTF8.GetString(pagedata);
//如果获取网站页面采用的是UTF-8,则使用这句
//因为我的博客使用了UTF-8编码,所以在这里我使用这句
Response.Write(result); //在WEB页中显示获取的内容
}
public static void Main()
{
try
{
WebClient MyWebClient = new WebClient();
MyWebClient.Credentials = CredentialCache.DefaultCredentials;
//Byte[] pageData = MyWebClient.DownloadData("http://blog.hnce.net");
Byte[] pageData = MyWebClient.DownloadData(" http://info.secu.hc360.com/list/news.shtml");
string pageHtml = Encoding.UTF8.GetString(pageData);
Console.WriteLine(pageHtml);
}
catch (WebException webEx)
{
Console.Write(webEx.ToString());
}
}
private string StripHTML(string strHtml)
{
int divs = strHtml.IndexOf("<div class=\"list\" style=\"padding-top:7px;\">");
string strOutput = strHtml.Substring(divs);
int dive = strOutput.IndexOf("</div>");
strOutput = strHtml.Substring(divs, dive - divs);
return strOutput;
}