87,907
社区成员
发帖
与我相关
我的任务
分享
import java.net.*;
import java.io.*;
import java.util.regex.Pattern;
public class CreateHTML {
public static void main(String[] args) {
CreateHTML uc = new CreateHTML();
uc.creatHTML("http://www.sina.com/", "c:\\shengchengdeHTML.html");
}
public void creatHTML(String webURL, String local) {
//new File(local);
FileWriter fw = null;
BufferedWriter bw = null;
try {
fw = new FileWriter(local);
bw = new BufferedWriter(fw);
} catch (Exception ex) {
ex.printStackTrace();
}
StringBuffer document = new StringBuffer();
try {
URL url = new URL(webURL);
URLConnection conn = url.openConnection();
BufferedReader reader = new BufferedReader(new InputStreamReader(
conn.getInputStream()));
String line = null;
while ((line = reader.readLine()) != null)
document.append(line + " ");
reader.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
//System.out.println(document.toString());
String strTemp = document.toString();
String temp = null;
int j = 0;
for (int i = 0; i < strTemp.length(); i++) {
if (i<strTemp.length()&&strTemp.charAt(i) == '>') {
j = i;
i++;
if (i > strTemp.length() - 2) {
temp = strTemp;
addLine(temp, bw);
break;
}
while (Pattern.compile("\\s{1}").matcher("" + strTemp.charAt(i)).find()) { //跳过空格
i++;
}
if (i<strTemp.length()&&strTemp.charAt(i) == '<') {
temp = strTemp.substring(0, i);
strTemp = strTemp.substring(i, strTemp.length());
addLine(temp, bw);
i = 0;
} else {
i = j;
}
}
if (strTemp.substring(0, 6).equalsIgnoreCase("<style")){
while(true){
if (strTemp.charAt(i) == '}') {
temp = strTemp.substring(0, i + 1);
strTemp = strTemp.substring(i + 1, strTemp.length());
addLine(temp, bw);
i = 0;
}
if(strTemp.substring(i,i+8).equalsIgnoreCase("</style>")){
break;
}
i++;
}
i=0;
}
if (strTemp.substring(0, 7).equalsIgnoreCase("<script")){
while(true){
if (strTemp.charAt(i) == '{'||strTemp.charAt(i) == '}'||strTemp.charAt(i) == ';') {
temp = strTemp.substring(0, i + 1);
strTemp = strTemp.substring(i + 1, strTemp.length());
addLine(temp, bw);
i = -1;
}
i++;
if(i>=strTemp.length()-9){
break;
}
if(strTemp.substring(i,i+9).equalsIgnoreCase("</script>")){
temp = strTemp.substring(0, i);
strTemp = strTemp.substring(i, strTemp.length());
addLine(temp, bw);
i = 0;
break;
}
}
}
}
//将上面步骤忽略的代码加入HTML页面
while(Pattern.compile("\\s{1}").matcher("" + strTemp.charAt(0)).find()){//去掉首空格、\t等
strTemp=strTemp.substring(1);
}
if(strTemp.toLowerCase().indexOf("</html>")>0){//去掉首空格等后,如果不以</html>开头,则表示上面的步骤,没有完成了所有代码的格式化。这种情况需要将剩余代码加进HTML中
addLine(strTemp, bw);
}
try {
bw.flush();
bw.close();
fw.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
private void addLine(String strLine, BufferedWriter bw) {
try {
bw.write(strLine);
bw.newLine();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}