81,091
社区成员
发帖
与我相关
我的任务
分享
import java.io.IOException;
import jpcap.JpcapCaptor;
import jpcap.NetworkInterface;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class IDMBDownloader {
/**
* @param args
*/
public static void main(String[] args){
// TODO Auto-generated method stub
//get front page
String baseURL = "http://www.imdb.com/genre/";
IDMBDownloader.URLprocess(baseURL);
}
public IDMBDownloader(){
}
public static void URLprocess(String baseURL)
{
Document baseDocument = null;
try {
baseDocument = Jsoup.connect(baseURL).userAgent("Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0")
.ignoreContentType(true).timeout(30000).get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("Entry success!");
Element genreTable = baseDocument.select("table[class^=genre-table]").get(0);
//get genre item
Elements gList = genreTable.select("h3");
String gTitle = null;
String gURL = null;
for(Element gType:gList){
Element gType1 = gList.select("a[href]").get(0);
/*
* for the text is like "Action "
* it has a Up-class letter and a ' '
*/
String s = gType1.text();
gTitle = s.substring(0, s.length()-2).toLowerCase();
gURL = gType1.attr("href");
System.out.println("Genre: " + gTitle);
//process
for(int i=1;i<1000;i+=50){
Document doc = null;
String sURL = "http://www.imdb.com/search/title?at=0&genres=" + gTitle + "&sort=moviemeter,asc&start=" + i + "&title_type=feature";
System.out.println("Search URL: " + sURL);
try {
doc = Jsoup.connect(sURL).userAgent("Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0")
.ignoreContentType(true).timeout(30000).get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("Genre page entry success!");
//Element movieDIV
Element movieTable = doc.select("table[class=results]").get(0);
Elements mList = movieTable.select("td[class=title]");
int j=i;
String mTitle = null;
String mURL = null;
for(Element mType:mList){
Element mType1 = mType.select("a[href]").get(0);
mTitle = mType1.text();
mURL = mType1.attr("href");
System.out.println("Title " + j + ":" + mTitle);
System.out.println("URL " + j + ":" + mURL);
//get movie page
Document mDoc = null;
try {
mDoc = Jsoup.connect("http://www.imdb.com/" + mURL)
.userAgent("Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0")
.ignoreContentType(true).timeout(30000).get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//parse movie page
Element trailer = null;
String name = "";
String genre = "";
//String length = "";
String director = "";
String actors = "";
//if only it has a trailer, it would be parsed
if(!mDoc.select("a[itemprop=trailer]").isEmpty())
{
trailer = mDoc.select("a[itemprop=trailer]").get(0);
Element block = mDoc.select("div#title-overview-widget").get(0);
//get name;or use block.select("h1[class=header]").
name = block.select("span[itemprop=name]").get(0).text();
//get genre
Elements gs = block.select("span[itemprop=genre]");
for(Element g:gs)
{
if(!genre.equals(""))
genre+='|';
genre+=g.text();
}
System.out.println("Genre " + j + ":" + genre);
//get length
//length = block.select("time[itemprop=duration]").get(0).text();
//System.out.println("Length " + j + ":" + length);
//get director
Element dirE = block.select("div[itemprop=director]").get(0);
director = dirE.select("a[href]").get(0).text();
System.out.println("Director " + j + ":" + director);
//get stars
Element actE = block.select("div[itemprop=actors]").get(0);
Elements as = actE.select("span[itemprop=name]");
for(Element a:as)
{
if(!actors.equals(""))
actors+=',';
actors+=a.text();
}
System.out.println("Actors " + j + ":" + actors);
/*
* sniff the packet
*/
Element traE = mDoc.select("a[itemprop=trailer]").get(0);
String traURL = traE.attr("href");
//connect the page to sniff
Jsoup.connect("http://www.imdb.com/" + traE)
.userAgent("Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0")
.ignoreContentType(true).timeout(30000);
//new a jp_captor
System.out.println("new page connect!");
try{
final NetworkInterface[] devices = JpcapCaptor.getDeviceList();
NetworkInterface network_interface = devices[1];
System.out.println("Device get!");
JpcapCaptor captor = JpcapCaptor.openDevice(network_interface, 65535, false, 20);
System.out.println("Device activated!");
captor.setFilter("arp", true);
//Receiver receiver = new Receiver(captor.getPacket());
captor.loopPacket(-1, new Receiver());
System.out.println("Captor activated!");
}
catch(Exception e){
e.printStackTrace();
System.out.println("fail to display the data of network interface--" + e);
}
}
j++;
System.out.println("-----------------------------------------");
}
}
}
}
}
import jpcap.PacketReceiver;
import jpcap.packet.Packet;
import jpcap.packet.TCPPacket;
public class Receiver implements PacketReceiver{
public Receiver(){}
public Receiver(Packet packet){
receivePacket(packet);
}
@Override
public void receivePacket(Packet packet) {
// TODO Auto-generated method stub
System.out.println(packet);
// _dst should be 54.230.66.171
if(packet instanceof TCPPacket){
//output IP addresses
TCPPacket tcpPacket = (TCPPacket)packet;
System.out.println(tcpPacket);
/*
System.out.println("ip address----");
System.out.println("dst --"+ tcpPacket.dst_ip);
//only process the connect to video_imdb
if(tcpPacket.dst_ip.toString().startsWith("54.230."))
{
for(int i=0;i<tcpPacket.data.length;i++)
{
System.out.print((char)tcpPacket.data[i]);
}
}
*/
}
}
}