111,126
社区成员
发帖
与我相关
我的任务
分享
public string GetHttpData(string Url)
{
string sException = null;
string sRslt = null;
WebResponse oWebRps = null;
WebRequest oWebRqst = WebRequest.Create(Url);
oWebRqst.Timeout = 50000;
try
{
oWebRps = oWebRqst.GetResponse();
}
catch (WebException e)
{
sException = e.Message.ToString();
Response.Write(sException);
}
catch (Exception e)
{
sException = e.ToString();
Response.Write(sException);
}
finally
{
if (oWebRps != null)
{
StreamReader oStreamRd = new StreamReader(oWebRps.GetResponseStream(), Encoding.GetEncoding("GB2312"));
sRslt = oStreamRd.ReadToEnd();
oStreamRd.Close();
oWebRps.Close();
}
}
return sRslt;
}
WebClient wc = new WebClient();//创建WebClient对象
string msg;//存储页面返回的内容
try
{
byte[] byDats = wc.DownloadData(this.TextBox1.Text);//下载指定的Url资源
msg = UnicodeEncoding.Default.GetString(byDats);//转换编码
}
catch (Exception ex)
{
msg = ex.Message;
}
msg.IndexOf("你要查询的关键字的索引");//只能查询一个,你可以循环实现
/// <summary>
/// 采集事件
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void Button1_Click(object sender, EventArgs e)
{
string[] ss = GetTitle();
for (int i = 0; i < ss.Length; i++)
{
Response.Write(ss[i]);
Response.Write("<br>");
}
}
/// <summary>
/// 分析并整理信息
/// </summary>
/// <returns></returns>
public string[] GetInfo()
{
WebClient wc = new WebClient();//创建WebClient对象
string msg;
try
{
byte[] byDats = wc.DownloadData(this.TextBox1.Text);//下载指定的Url资源
msg = UnicodeEncoding.Default.GetString(byDats);//转换编码
}
catch (Exception ex)
{
msg = ex.Message;
}
#region 正则切割
System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(@"·\s*<a[^>]*>(?<title>[^<]*)</a>\s*<font[^>]*>(?<date>[^<]*)</font>");
string result = string.Empty;
foreach (Match m in reg.Matches(msg))
{
result += m.Groups["title"].Value + ",";//新闻标题
result += m.Groups["date"].Value + "|" + "\r\n";//时间
}
#endregion 正则切割结束
string[] strResult = result.Split('|');
ArrayList al = new ArrayList();
for (int i = 0; i < strResult.Length; i++)
{
al.Add(strResult[i]);
}
string[] ds;
ArrayList al2 = new ArrayList();
for (int i = 0; i < al.Count; i++)
{
ds = al[i].ToString().Split(',');
foreach (string s in ds)
{
al2.Add(s);
}
}
return (string[])al2.ToArray(typeof(string));
}
/// <summary>
/// 获取文章标题
/// </summary>
/// <returns></returns>
public string[] GetTitle()
{
ArrayList al = new ArrayList();
string[] Titles = GetInfo();
for (int i = 0; i < Titles.Length; i++)
{
if ((i % 2) == 0)
{
al.Add(Titles[i]);
}
}
return ((string[])al.ToArray(typeof(string)));
}
/// <summary>
/// 获取文章发布日期
/// </summary>
/// <returns></returns>
public string[] GetDate()
{
ArrayList al = new ArrayList();
string[] Dates = GetInfo();
for (int i = 0; i < Dates.Length; i++)
{
if ((i % 2) != 0)
{
al.Add(Dates[i]);
}
}
return ((string[])al.ToArray(typeof(string)));
}