81,076
社区成员




package xml;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import xml.ParseHtml.Callback;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
public class MyHTMLToXML {
private Document doc=new Document();
public static void main(String args[]) {
ParseHtml ph = new ParseHtml();
try {
String filename = "f:\\show.html";
BufferedReader brd = new BufferedReader(new FileReader(filename));
char[] str = new char[50000];
brd.read(str);
String sHtml = new String(str);
MyHTMLToXML my=new MyHTMLToXML();
my.startParse(sHtml);
} catch (Exception e) {
e.printStackTrace();
}
}
private void startParse(String sHtml) {
try {
Element root=new Element("root");
doc.setRootElement(root);
ParserDelegator parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = new Callback(root);
parser.parse(new StringReader(sHtml), callback, true);
XMLOutputter outp = new XMLOutputter();//用于输出jdom 文档
Format format=Format.getPrettyFormat(); //格式化文档
format.setEncoding("GBK"); //由于默认的编码是utf-8,中文将显示为乱码,所以设为gbk
outp.setFormat(format);
try {
outp.output(doc,System.out);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} //输出文档
} catch (Exception e) {
e.printStackTrace();
}
}
class Callback extends HTMLEditorKit.ParserCallback {
private Document doc;
private Element root;
public Callback(Document _doc){
this.doc=_doc;
}
public Callback(Element _root){
this.root=_root;
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
if (t.equals(HTML.Tag.TABLE)) {
String src = (String) a.getAttribute(HTML.Attribute.NAME);
System.out.println("No." + count + " IMG src=" + src);
count++;
}
if (t.equals(HTML.Tag.INPUT)) {
String type = (String) a.getAttribute(HTML.Attribute.TYPE);
System.out.println("No." + count + " input type=" + type);
Element table=(Element)root.getChildren().get(0);
Element tr=new Element("tr");
Element td=new Element("td");
String id = (String) a.getAttribute(HTML.Attribute.ID);
for(int i=0;i<table.getContentSize();i++){
tr=(Element)table.getChildren().get(i);
for(int j=0;j<tr.getContentSize();j++){
td=(Element)tr.getChildren().get(j);
Element input=new Element("input");
if(type.equals("text") && id!=null){
if(id.equals(td.getAttributeValue("id"))){
td.addContent(input);
input.setAttribute("id",id);
input.setAttribute("type","text");
input.setAttribute("size","10");
if(a.getAttribute(HTML.Attribute.WIDTH)!=null){
input.setAttribute("width",a.getAttribute(HTML.Attribute.WIDTH).toString());
}
input.setAttribute("style",a.getAttribute(HTML.Attribute.STYLE).toString());
if(a.getAttribute(HTML.Attribute.VALUE)!=null){
input.setAttribute("value",a.getAttribute(HTML.Attribute.VALUE).toString());
}
// System.out.println("--"+a.getAttribute(HTML.Attribute));
}
}
}
}
count++;
}
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
if (t.equals(HTML.Tag.TABLE)) {
String src = (String) a.getAttribute(HTML.Attribute.NAME);
Element table=new Element("table");
root.addContent(table);
System.out.println("No." + count + " table name=" + src);
count++;
}
if(t.equals(HTML.Tag.TR)){
Element tr=new Element("tr");
String id=(String)a.getAttribute(HTML.Attribute.ID);
Element table=(Element)root.getChildren().get(0);
table.addContent(tr);
tr.setAttribute("id",id);
count++;
}
if (t.equals(HTML.Tag.TD)) {
Element table=(Element)root.getChildren().get(0);
Element tr=new Element("tr");
Element td=new Element("td");
String id = (String) a.getAttribute(HTML.Attribute.ID);
for(int i=0;i<table.getContentSize();i++){
td=new Element("td");
tr=(Element)table.getChildren().get(i);
if(id!=null ){
String str=id.substring(3,4);
if(str.equals(tr.getAttributeValue("id"))){
tr.addContent(td);
td.setAttribute("id",id);
}
}
}
System.out.println("No." + count + " td id=" + id);
count++;
}
}
private int count = 1;
}
}