50,526
社区成员
发帖
与我相关
我的任务
分享
public static List<JsoupBean> jsoup(String keywords) throws IOException {
List<JsoupBean> list = new ArrayList<JsoupBean>();
System.out.println("开始抓取!");
String url = "https://www.google.com.hk/search?safe=strict&hl=zh-CN&tbs=qdr:m&lr=lang_zh-CN%7Clang_zh-TW&site=webhp&source=hp&q="+keywords;//Google
System.out.println("抓取关键字为:"+keywords);
System.out.println("抓取地址为:"+url);
Document doc = Jsoup.connect(url).timeout(6000).header("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2").get();
Elements rs = doc.select("h3.r>a");
for (Element element : rs) {
JsoupBean bean = new JsoupBean();
if (element != null && element.parent() != null && element.parent().nextElementSibling() != null) {
Elements cites = element.parent().nextElementSibling().select("cite");
for (Element cite : cites) {
Element span = cite.parent().nextElementSibling();
if (cite != null && cite.parent() != null && span != null) {
String describe = span.text().equals("") || span.text() == null ? null : span.text();
bean.setDescribe(describe);
String html = cite.text().equals("") || cite.text() == null ? null : cite.text();
bean.setHtml(html);
}
}
}
String title = element.text();
bean.setTitle(title);
list.add(bean);
}
System.out.println("抓取结束!");
return list;
}
/*插入数据操作*/
public int addJsoupDate(){
List<Object> list=null;
String sql;
list=new ArrayList<Object>();
sql ="insert into t_link(title,url,describer,createTime,status,platformId)values";
try {
List<JsoupBean> code =JsoupDate.jsoup(date[1]);
for(JsoupBean bean:code){
sql+="(?,?,?,now(),0,4),";
list.add(bean.getTitle());
list.add(bean.getHtml());
list.add(bean.getDescribe());
}
sql = sql.substring(0,sql.length()-1);
System.out.println(sql);
} catch (Exception e) {
e.printStackTrace();
}
return this.update(sql, (ArrayList<Object>) list);
}
开始抓取!
抓取关键字为:中国邮政
抓取地址为:https://www.google.com.hk/search?safe=strict&hl=zh-CN&tbs=qdr:m&lr=lang_zh-CN%7Clang_zh-TW&site=webhp&source=hp&q=中国邮政
java.net.SocketTimeoutException: connect timed out
at java.net.DualStackPlainSocketImpl.waitForConnect(Native Method)
at java.net.DualStackPlainSocketImpl.socketConnect(DualStackPlainSocketImpl.java:85)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:339)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:200)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:182)
at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:172)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:579)
at sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:625)
at sun.net.NetworkClient.doConnect(NetworkClient.java:175)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:432)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:527)
at sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:275)
at sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:371)
at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:191)
at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:933)
at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:177)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(HttpsURLConnectionImpl.java:153)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:563)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:540)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:227)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:216)
at com.com.test.JsoupDate.jsoup(JsoupDate.java:22)
at com.com.test.JsoupDao.addJsoupDate(JsoupDao.java:30)
at com.com.test.JsoupDao.main(JsoupDao.java:48)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)