62,614
社区成员
发帖
与我相关
我的任务
分享
package com.test;
import java.io.*;
import java.net.*;
public class du_wangye_0100 {
/**
* @param args
*/
public static void main(String[] args) throws IOException{
String htmlurl = "http://www.qq163.com";
String tmp = readhtml(htmlurl,"gbk");
get_a(tmp);
String str = "<a href=\"http://www.3533.com/\" target=\"_blank\">手机世界</a> ┊ <a href=\"http://www.hao123.com/\" target=\"_blank\">hao123网址之家</a> ┊ <a href=\"http://www.3533.com/phone/\">手机大全</a> ┊ <a href=\"http://www.ip138.com/\">iP查询</a> ┊ <a href=\"http://game.3533.com/zhuti/\" >手机主题</a>";
String regex = "<a href=\".+[^:]\"\\S+</a>";
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(str);
if (m.find()) {
System.out.println(m.group());
}
}
static String readhtml(String htmlurl,String charset) throws IOException{
StringBuffer sb = new StringBuffer();
URL url = new URL(htmlurl);
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
InputStream in = url.openStream();
String line;
BufferedReader br = new BufferedReader(new InputStreamReader(in,charset));
System.out.println("开始读取网页");
while((line = br.readLine())!= null){
sb.append(line);
}
br.close();
in.close();
return sb.toString();
}
static void get_a(String line){
String regx;
regx="<a href=\"\\S+\"";
//regx="<a href=\".+\"\\S+>\\w+</a>";
Pattern p = Pattern.compile(regx);
Matcher m = p.matcher(line);
while(m.find()){
System.out.println(m.group());
}
}
}
public static void main(String[] args) {
String str = "sfsf<a href=\"http://www.3533.com/\" target=\"_blank\">手机世界</a> ┊ <a href=\"http://www.hao123.com/\" target=\"_blank\">hao123网址之家</a>sfdsf";
Pattern p = Pattern.compile("<a href=.*?>.*?</a>");
Matcher m = p.matcher(str);
while(m.find()){
System.out.println(m.group());
}
}
URL url = new URL("http://www.qq163.com");
Document doc = Jsoup.parse(url,30000);
Elements links = doc.select("a");
for(Element link:links){
// System.out.println(link);
System.out.println(link.attr("href"));
System.out.println(link.attr("target"));
System.out.println(link.text());
}