81,092
社区成员
发帖
与我相关
我的任务
分享
private byte[] queryData() throws Exception {
java.net.URL connUrl = new URL(url);
java.net.HttpURLConnection conn = (HttpURLConnection) connUrl.openConnection();
conn.setRequestProperty("User-agent","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; Maxthon 2.0)");
java.io.InputStream input = conn.getInputStream();
byte[] data = new byte[1024];
int length = 0;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
while ((length = input.read(data)) > 0) {
baos.write(data, 0, length);
}
conn.disconnect();
return baos.toByteArray();
}
java.io.InputStream input = conn.getInputStream();
source = super.queryUrl(searchUrl, urlEncode);
PatternMatcherInput input2 = new PatternMatcherInput(source.toString());
PatternMatcher matcher = new Perl5Matcher();
System.out.println("============================input2.substring "+input2.substring(10000, input2.length()-1000));
if (tf.getBlockPat() != null) {
while (matcher.contains(input2, tf.getBlockPatPattern())) {
try {
WebSearchResult res = pressPage(matcher.getMatch().group(1));
if (res != null)
result.add(res);
} catch (Exception ex) {
continue;
}
}
}
private WebSearchResult pressPage(String blockPat) {
System.out.println("======blockPat===="+ blockPat);
WebSearchResult result = new WebSearchResult();
PatternMatcher matcher = new Perl5Matcher();
String value = blockPat;
String url = null;
..........................................
........................... 代码省略
return result;
}
<li class=g style="margin-bottom:8px"><h3 class="r"><a href="http://rent.soufu
n.com/chuzu/1_55444876_-1.htm" target=_blank class=l onmousedown="return clk(0,'
','','','16','','0CE0QFjAP')">北齿小区租房,两室一厅北京齿轮厂宿舍_北京租房网_搜
房网</a></h3><div class="s"><span class="f std" >11 秒前</span> - <b>...</b> 中
国旅游学院附中,八里庄第三小学,北京市朝阳区育人学校幼儿园:勘测设计院幼 <b>...</b
><br><span class=f><cite>rent.soufun.com/chuzu/1_55444876_-1.htm</cite><span cla
ss=gl></span></span></div>
<li class=g style="margin-bottom:8px"><h3 class="r"><a
href="http://rent.soufun.com/chuzu/1_55444889_-1.htm" target=_blank class=l onm
ousedown="return clk(0,'','','','17','','0CE8QFjAQ')">东方瑞景租房,一室一厅出租
长安街附近东方公寓房屋_北京租房网_搜房网</a></h3><div class="s"><span class="f s
td" >58 秒前</span> - <b>...</b> 周边配套:<em>大学</em>:华夏管理学院、朝阳区职
工<em>大学</em>中小学:陈经纶中学、芳草地小学 <b>...</b><br><span class=f><cite>
rent.soufun.com/chuzu/1_55444889_-1.htm</cite><span class=gl></span></span></div
>
<li class=g id=mbb18><h3 class="r"><a href="http://newhouse.wuhu.soufun.com/201
0-11-05/4000823.htm" target=_blank class=l onmousedown="return clk(0,'','','','1
8','','0CFEQFjAR')">花开收官时最美成熟季四期臻品小高层即将推出-芜湖新房网-搜房网
</a></h3><div class="s"><span class="f std" >47 秒前</span> - 物业地址弋江区九华
南路800号(安徽师范<em>大学</em>南校区对面). 交通状况可乘坐16、18、45 <b>...</b
><br><span class=f><cite>newhouse.wuhu.soufun.com/2010-11-05/4000823.htm</cite><
span class=gl></span></span></div><div class=mbl><div class=bl><span class=ch id
=mbl18 onclick="google.x(this)" style="display:inline-block"><div class=mbi></di
v><a href=# onclick="return false" class=mblink>显示来自 soufun.com·的更多搜索
结果</a></span></div></div><div id=mbf18><span></span></div>
<li class=g style="m
argin-bottom:8px"><h3 class="r"><a href="http://house.focus.cn/news/2010-11-05/1
093022.html" target=_blank class=l onmousedown="return clk(0,'','','','19','','0
CFQQFjAS')">十年城北区盛放抢最后的新牌坊- 新闻中心- 搜狐焦点网</a></h3><div clas
s="s"><span class="f std" >38 秒前</span> - <b>...</b> 在上半年销售面积100134平
米,销售金额600798157元,两项指标双双进入<em>重庆</em>前十。 <b>...</b> 2010-11-
05二次调控中高端别墅表现出众客群稳定供需两; 2010-11-04<em>大学</em>城投资 <b>...
</b><br><span class=f><cite>house.focus.cn/news/2010-11-05/1093022.html</cite><s
pan class=gl></span></span></div>
String cookie = "";
do {
HttpURLConnection conn = (HttpURLConnection) new URL("http://www.google.com.hk/search?q=%E5%A6%87%E5%A5%B3&hl=zh-CN").openConnection();
if(cookie.length() != 0)
conn.setRequestProperty("Cookie", cookie);
conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0)");
conn.setInstanceFollowRedirects(false);
int code = conn.getResponseCode();
if(code == HttpURLConnection.HTTP_MOVED_TEMP) {
cookie += conn.getHeaderField("Set-Cookie") + ";";
}
if(conn.getResponseCode() == HttpURLConnection.HTTP_OK)
break;
} while(true);