110,536
社区成员
发帖
与我相关
我的任务
分享
private void button1_Click(object sender, EventArgs e)
{
label1.Text = DateTime.Now.ToString();
string strURL = "http://ftp2.etedi.com/cd_qry/qry_vessel_code.aspx";
//存放抓取的数据
DataTable dtbl = new DataTable();
dtbl.Columns.Add("Code");
dtbl.Columns.Add("ENG_Name");
dtbl.Columns.Add("IN_Voyage");
dtbl.Columns.Add("OUT_Voyage");
//开始抓取数据
//获得指定页面的内容
WebRequest hwr = WebRequest.Create(strURL);
HttpWebResponse hwp = hwr.GetResponse() as HttpWebResponse;
StreamReader sr;
string code = hwp.ContentType;
//得到编码了
//如果取不到则默认为gb2312
try
{
code = code.Split('=')[1];
}
catch
{
code = "gb2312";//utf-8
//code = "utf-8";
}
Stream rep = hwp.GetResponseStream();
sr = new StreamReader(rep, Encoding.GetEncoding(code));
string strSource = sr.ReadToEnd();
//匹配出表格内容
Regex rx = new Regex("<table cellspacing=\"0\" rules=\"all\" bordercolor=\"Black\" border=\"1\" id=\"DataGrid1\" style=\"border-color:Black;width:502px;border-collapse:collapse;\" >" + @"([\S\s]*?)" + "</table>"
, RegexOptions.Compiled | RegexOptions.IgnoreCase);
MatchCollection matchs = rx.Matches(strSource);
if (matchs.Count > 0)
{
strSource = matchs[0].Value;//@all</td><td>(.*)@all</td>
string pattern = "<tr.*?>@all(.*)@all(.*)@all(.*)@all(.*)@all</tr>";
pattern = pattern.Replace("@all", @"[\S\s]*?");
rx = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
//将匹配出的数据放入DataTable
DataRow drow;
matchs = rx.Matches(strSource);
for (int i = 0; i < matchs.Count; i++)
{
drow = dtbl.NewRow();
drow["Code"] = matchs[i].Groups[1].Value;
drow["ENG_Name"] = matchs[i].Groups[2].Value;
drow["IN_Voyage"] = matchs[i].Groups[3].Value;
drow["OUT_Voyage"] = matchs[i].Groups[4].Value;
dtbl.Rows.Add(drow);
}
}
//绑定显示抓取的数据
dataGridView1.DataSource = dtbl;
//GridView1.DataBind();
//可以在文本框中显示抓取的表格内容
//TextBox1.Text = strSource;
label2.Text = DateTime.Now.ToString();
}
Regex reg = new Regex(@"(?<=<table[^<>]+?DataGrid1[^<>]+>)[\s\S]+?(?=</table>)");
string data = reg.Match(strURL).Value;
Regex reg1 = new Regex(@"<tr[^<>]+>(?:\s*<td>(?<key>[^<>]+)</td>){4}\s*</tr>");
MatchCollection mc = reg1.Matches(data);
foreach (Match m in mc)
{
Console.WriteLine(m.Groups["key"].Captures[0]);
Console.WriteLine(m.Groups["key"].Captures[1]);
Console.WriteLine(m.Groups["key"].Captures[2]);
Console.WriteLine(m.Groups["key"].Captures[3]);
}
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<HTML>
<HEAD>
<title>船号对照表</title>
<meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1">
<meta name="CODE_LANGUAGE" Content="C#">
<meta name="vs_defaultClientScript" content="JavaScript">
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
</HEAD>
<body MS_POSITIONING="GridLayout" background="images\expl.gif">
<form name="Form1" method="post" action="qry_vessel_code.aspx" id="Form1">
<input type="hidden" name="__VIEWSTATE" value=" 4+Oz47Oz47Pj47Pj47Pj47Pj47Pj47Pu7DKEAuvwFgTU8mM3YUvPpMfNxh" />
<FONT face="宋体">
<TABLE id="Table1" style="Z-INDEX: 101; LEFT: 0px; POSITION: absolute; TOP: 0px" cellSpacing="0"
cellPadding="0" width="100%" border="0">
<TR>
<TD height="100">
<img id="Image1" src="images\text12.gif" alt="" border="0" style="height:100px;width:982px;" /></TD>
</TR>
<TR>
<TD style="HEIGHT: 225px" align="center" vAlign="top">
<P> </P>
<table cellspacing="0" rules="all" bordercolor="Black" border="1" id="DataGrid1" style="border-color:Black;width:502px;border-collapse:collapse;">
<tr align="Center" style="color:White;background-color:#A2C1EC;font-size:Smaller;font-weight:bold;">
<td>船号</td><td>英文船名</td><td>进口航次</td><td>出口航次</td>
</tr><tr align="Center" style="color:#003366;background-color:#DDDDFF;font-size:Smaller;">
<td>615050</td><td>105HYODONGCHEMI </td><td>1106 </td><td>1107 </td>
</tr><tr align="Center" style="color:#003366;background-color:#CCDDEE;font-size:Smaller;">
<td>615093</td><td>105HYODONGCHEMI </td><td>1110 </td><td>1111 </td>
</tr>
</table></TD>
</TR>
</TABLE>
</FONT>
</form>
</body>
</HTML>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<HTML>
<HEAD>
<title>船号对照表</title>
<meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1">
<meta name="CODE_LANGUAGE" Content="C#">
<meta name="vs_defaultClientScript" content="JavaScript">
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
</HEAD>
<body MS_POSITIONING="GridLayout" background="images\expl.gif">
<form name="Form1" method="post" action="qry_vessel_code.aspx" id="Form1">
<input type="hidden" name="__VIEWSTATE" value=" 4+Oz47Oz47Pj47Pj47Pj47Pj47Pj47Pu7DKEAuvwFgTU8mM3YUvPpMfNxh" />
<FONT face="宋体">
<TABLE id="Table1" style="Z-INDEX: 101; LEFT: 0px; POSITION: absolute; TOP: 0px" cellSpacing="0"
cellPadding="0" width="100%" border="0">
<TR>
<TD height="100">
<img id="Image1" src="images\text12.gif" alt="" border="0" style="height:100px;width:982px;" /></TD>
</TR>
<TR>
<TD style="HEIGHT: 225px" align="center" vAlign="top">
<P> </P>
<table cellspacing="0" rules="all" bordercolor="Black" border="1" id="DataGrid1" style="border-color:Black;width:502px;border-collapse:collapse;">
<tr align="Center" style="color:White;background-color:#A2C1EC;font-size:Smaller;font-weight:bold;">
<td>船号</td><td>英文船名</td><td>进口航次</td><td>出口航次</td>
</tr><tr align="Center" style="color:#003366;background-color:#DDDDFF;font-size:Smaller;">
<td>? </td><td>0 </td><td> </td><td> </td>
</table></TD>
</TR>
</TABLE>
</FONT>
</form>
</body>
</HTML>