62,623
社区成员
发帖
与我相关
我的任务
分享
import java.net.*;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.*;
import javax.swing.SwingUtilities;
public class SimpleAccess {
public static ArrayList gl = new ArrayList();
public static ArrayList line = new ArrayList();
public static void urlList(String url) throws Exception {
if (!gl.contains(url)) {
gl.add(url);
for (int i = 0; i < gl.size(); i++) {
getGoodLinks((String) gl.get(i));
}
}
}
public static void getGoodLinks(String str) throws Exception {
URL myURL = new URL(str);
URLConnection connection = myURL.openConnection();
if (connection.getContentType().toLowerCase().contains("html")) {
System.out.println("type is html");
System.out.println(str);
BufferedReader in = new BufferedReader(new InputStreamReader(myURL
.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
Pattern p = Pattern.compile("(.*href=?[\"])(.*?)([\"].*)");
Matcher m = p.matcher(inputLine);
while (m.find()) {
URL u = new URL(myURL, m.group(2));
if (!gl.contains(u.toString())) {
gl.add(u.toString());
}
}
}
in.close();
}
}
public int getSize() {
return gl.size();
}
public static void main(String[] args) throws Exception {
SimpleAccess sa = new SimpleAccess();
urlList("http://www.csdn.com");
for (int i = 0; i < gl.size(); i++) {
System.out.println(gl.get(i));
}
System.out.println("finished");
int a = sa.getSize();
System.out.println(a);
}
}