62,614
社区成员
发帖
与我相关
我的任务
分享
public static void main(String[] args) throws Exception {
String[] urls = {
"http://mil.news.sina.com.cn/2012-04-10/0428687123.html",
"http://mil.news.sina.com.cn/2012-04-12/0731687387.html",
"http://news.sina.com.cn/c/2012-04-13/044224264609.shtml"
};
final Pattern titlePattern = Pattern
.compile("<h1 id=\"artibodyTitle\".*?>(.*?)</h1>");
final Pattern wordCountPattern = Pattern.compile("\u515a|\u56fd\u5bb6");
for (final String url : urls) {
new Thread() {
public void run() {
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(
new URL(url).openStream(), "GB2312"));
String line;
String title = null;
int[] count = new int[2];
while ((line = reader.readLine()) != null) {
if (title == null) {
Matcher titleMatcher = titlePattern.matcher(line);
if (titleMatcher.find()) {
title = titleMatcher.group(1);
}
}
Matcher wordCountMatcher = wordCountPattern.matcher(line);
while (wordCountMatcher.find()) {
String word = wordCountMatcher.group();
count[word.length() >> 1]++;
}
}
if (count[0] > count[1]) {
throw new RuntimeException(
String.format("%s[%s] \u515a:%d > \u56fd\u5bb6:%d",
title,
url,
count[0],
count[1]));
}
System.out.printf("%s[%s] is good!", title, url);
} catch (IOException ex) {
ex.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
reader = null;
} catch (Exception ex) {
}
}
}
}
}.start();
}
}