81,092
社区成员
发帖
与我相关
我的任务
分享
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.net.URL;
import java.net.URLConnection;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel.MapMode;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PuzzleParsor {
private static class Config{
private String solutionPath;
private int startIndex;
private int maxIndex;
public Config(String solution, int startIndex, int maxIndex){
this.solutionPath = solution;
this.startIndex = startIndex;
this.maxIndex = maxIndex;
}
public String getSolutionPath() {
return solutionPath;
}
public int getStartIndex() {
return startIndex;
}
public int getMaxIndex() {
return maxIndex;
}
}
/*
private static final int NORTH = 8;
private static final int WEST = 16;
private static final int EAST = 32;
private static final int SOUTH = 64;
*/
private static final String lineSeparator = System.getProperty("line.separator");
public static void main(String[] args) throws Exception {
Config config = getConfig();
String solutionPath = config.getSolutionPath();
int startIndex = config.getStartIndex();
int maxIndex = config.getMaxIndex();
for(int i = startIndex; i <= maxIndex; i ++){
//输出
output(extractPuzzleSolutionFromUrl(i, solutionPath + i));
}
}
private static void output(List<String> list) throws Exception{
StringBuffer stringBuffer = new StringBuffer();
for(String str : list){
stringBuffer.append(str);
}
RandomAccessFile resultFile = new RandomAccessFile("result.xls", "rw");
resultFile.getChannel().write(Charset.forName("utf-8").encode(stringBuffer.toString()), resultFile.length());
resultFile.close();
}
private static List<String> extractPuzzleSolutionFromUrl(int index, String urlName) throws Exception{
System.out.println("process:" + index);
List<String> resultList = new ArrayList<String>();
String content = extractPuzzleTableFromUrl(urlName);
Pattern difficultyPattern = Pattern.compile("Difficulty: (.*?)<br");
Matcher difficultyMatcher = difficultyPattern.matcher(content);
String difficulty = "";
if(difficultyMatcher.find())
difficulty = difficultyMatcher.group(1);
Pattern pattern = Pattern.compile("<tr class=\"arukone\">(.*?)</tr>");
Matcher matcher = pattern.matcher(content);
List<String> rowList = new ArrayList<String>();
while(matcher.find()){
rowList.add(matcher.group(1));
}
int[][] table = parseTableInfoToCharArray(rowList);
for(int i = 0; i < table.length; i ++){
for(int j = 0; j < table[i].length; j ++){
if(table[i][j] != 0){
resultList.add(index + "," + difficulty + "," + table[i][j] + "," + i + "," + j + lineSeparator);
}
}
}
return resultList;
}
private static int[][] parseTableInfoToCharArray(List<String> rowList){
int[][] array = new int[rowList.size()][];
for(int i = 0; i < rowList.size(); i ++){
array[i] = parseRowInfoToArray(rowList.get(i));
}
return array;
}
private static int[] parseRowInfoToArray(String info){
String[] cells = info.split("</td>");
int[] result = new int[cells.length];
Pattern pattern = Pattern.compile("<td class=\"[^>]*\">([^<]+)");
for(int i = 0; i < cells.length; i ++){
Matcher matcher = pattern.matcher(cells[i]);
if(matcher.find()){
String value = matcher.group(1);
result[i] = Integer.parseInt(value);
}
}
return result;
}
private static String extractPuzzleTableFromUrl(String urlName) throws Exception{
URL url = new URL(urlName);
URLConnection urlConnection = url.openConnection();
InputStream inputStream = urlConnection.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
StringBuffer stringBuffer = new StringBuffer();
String str = null;
while((str = reader.readLine()) != null){
stringBuffer.append(str);
}
String content = stringBuffer.toString();
int startIndex = content.indexOf("<!-- main -->");
int endIndex = content.indexOf("<!-- footer -->");
return content.substring(startIndex, endIndex);
}
private static Config getConfig() throws Exception{
RandomAccessFile file = new RandomAccessFile("config.txt", "r");
ByteBuffer buffer = file.getChannel().map(MapMode.READ_ONLY, 0, file.length());
CharBuffer charBuffer = Charset.forName("utf-8").decode(buffer);
String content = charBuffer.toString();
String solution = extractInfo(content, lineSeparator, "path");
int maxIndex = Integer.parseInt(extractInfo(content, lineSeparator, "maxIndex"));
int startIndex = Integer.parseInt(extractInfo(content, lineSeparator, "startIndex"));
file.close();
return new Config(solution, startIndex, maxIndex);
}
private static String extractInfo(String content, String lineSeparator, String name){
Pattern pathPattern = Pattern.compile(name + "=(.*?)" + lineSeparator + "|$");
Matcher pathMatcher = pathPattern.matcher(content);
if(pathMatcher.find())
return pathMatcher.group(1);
return "";
}
}
配置文件config.txt
path=http://www.menneske.no/arukone/5x5/eng/showpuzzle.html?number=
startIndex=1400
maxIndex=1413
没用多线程哦,你可以自己改,改的话要把output同步,另外我没写到excel,随便写了个txt文件,用excel也能打开,如果非要用excel你去网上找找有例子,还要下jar包什么的