62,634
社区成员




package com.ksource.spider.netSpider;
import java.io.IOException;
import java.util.LinkedList;
import java.util.Queue;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class StartS {
private static Queue<String> linkQueue = new LinkedList<String>();
public static void main(String[] args) {
try {
Response response = executeLink("http://www.weibo.com/?c=spr_sinamkt_buy_srwj1_weibo_t111");
String link = linkQueue.poll();
Document document = response.parse();
System.out.println(document.toString());
System.out.println("--------------------------------------------------");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static Response executeLink(String href) throws IOException {
Response response= Jsoup.connect(href)
.ignoreContentType(true)
.userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.100 Safari/537.36")
.timeout(12000)
.followRedirects(true)
.execute();
return response;
}
}
FM.view({"ns":"pl.content.homeFeed.index","domid":"Pl_Official_MyProfileFeed__29","css":["style/css/module/list/comb_WB_feed_profile.css?version=aa44b85252d881b4"],"js":"page/js/pl/content/homeFeed/index.js?version=f3a6ca617210d1fb","html":" <div class=\"WB_feed WB_feed_v3 WB_feed_v4\" pageNum=\"\" node-type='feed_list' module-type=\"feed\">\r\n <div style=\"position:relative;\" node-type=\"feedconfig\" data-queryfix=is_hot=1>\r\n <div style=\"position:absolute;top:-110px;left:0;width:0;height:0;\" id=\"feedtop\" name=\"feedtop\"><\/div>\r\n <\/div>\r\n \t \t\t \t\t \t\t \t\t \t \t<div tbinfo=\"ouid=5710586189\" action-type=\"feed_list_item\" diss-data=\"\" mid=\"4062721538071172\" class=\"WB_cardwrap WB_feed_type S_bg2 WB_feed_vipcover \">\n <div class=\"WB_feed_detail clearfix\" node-type=\"feed_content\"\n
script charset="utf-8">FM.view({"pid":"pl_unlogin_home_hotpersoncategory","js":[],"css":[],"html":"<div class=\"WB_cardwrap S_bg2\">\n <div class=\"DSC_text_b DSC_text_b1\">\n <div class=\"WB_cardtitle_b S_line2\">\n 。。。。。。。
Document body = Jsoup.connect(curl).timeout(timeout).get();
Elements productsTag = body.getElementsByClass("products");
String text =productsTag.text()+"";
//获取curl网站class=products的标签text<html><body></body></html>
这种。还有个sina visitor system