急~~~java解析html输出xml格式文档问题

cccpu 2009-03-13 04:40:28

小弟做一个项目,需要将html页面里的表格,格式和数据进行保存,保存为xml格式,有点小问题需要救助!
代码:
我现在有的input里有readOnly='true'有的input里没有,我取其它属性的时候都能取到
比如取width:a.getAttribute(HTML.Attribute.WIDTH);这样都可以取到,但是当我取readOnly属性的时候
HTML.Attribute.打点没有readOnly属性.请教怎么能取到readOnly属性



package xml;



import java.io.BufferedReader;

import java.io.FileReader;

import java.io.IOException;

import java.io.StringReader;



import javax.swing.text.MutableAttributeSet;

import javax.swing.text.html.HTML;

import javax.swing.text.html.HTMLEditorKit;

import javax.swing.text.html.parser.ParserDelegator;



import xml.ParseHtml.Callback;



import org.jdom.Document;

import org.jdom.Element;

import org.jdom.output.Format;

import org.jdom.output.XMLOutputter;



public class MyHTMLToXML {

	private Document doc=new Document();

	public static void main(String args[]) {

		ParseHtml ph = new ParseHtml();



		try {

			String filename = "f:\\show.html";

			BufferedReader brd = new BufferedReader(new FileReader(filename));

			char[] str = new char[50000];

			brd.read(str);

			String sHtml = new String(str);

			

			MyHTMLToXML my=new MyHTMLToXML();

			my.startParse(sHtml);

			

		} catch (Exception e) {

			e.printStackTrace();

		}

	}



	private void startParse(String sHtml) {

		try {

			Element root=new Element("root");

			doc.setRootElement(root);

			ParserDelegator parser = new ParserDelegator();

			HTMLEditorKit.ParserCallback callback = new Callback(root);

			parser.parse(new StringReader(sHtml), callback, true);

			

			

			

			XMLOutputter outp = new XMLOutputter();//用于输出jdom 文档

			Format format=Format.getPrettyFormat(); //格式化文档

			format.setEncoding("GBK"); //由于默认的编码是utf-8,中文将显示为乱码，所以设为gbk

			outp.setFormat(format);

			try {

				outp.output(doc,System.out);

			} catch (IOException e) {

				// TODO Auto-generated catch block

				e.printStackTrace();

			} //输出文档

		} catch (Exception e) {

			e.printStackTrace();

		}

	}

	class Callback extends HTMLEditorKit.ParserCallback {

		private Document doc;

		private Element root;

		public Callback(Document _doc){

			this.doc=_doc;

		}

		public Callback(Element _root){

			this.root=_root;

		}

		public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {

			if (t.equals(HTML.Tag.TABLE)) {

				String src = (String) a.getAttribute(HTML.Attribute.NAME);

				System.out.println("No." + count + "   IMG   src=" + src);

				count++;

			}

			if (t.equals(HTML.Tag.INPUT)) {

				String type = (String) a.getAttribute(HTML.Attribute.TYPE);

				System.out.println("No." + count + "   input   type=" + type);

				Element table=(Element)root.getChildren().get(0);

				Element tr=new Element("tr");

				Element td=new Element("td");

				String id = (String) a.getAttribute(HTML.Attribute.ID);

				for(int i=0;i<table.getContentSize();i++){

					tr=(Element)table.getChildren().get(i);

					for(int j=0;j<tr.getContentSize();j++){

						td=(Element)tr.getChildren().get(j);

						Element input=new Element("input");

						if(type.equals("text") && id!=null){

							if(id.equals(td.getAttributeValue("id"))){

								td.addContent(input);

								input.setAttribute("id",id);

								input.setAttribute("type","text");

								input.setAttribute("size","10");

								if(a.getAttribute(HTML.Attribute.WIDTH)!=null){

									input.setAttribute("width",a.getAttribute(HTML.Attribute.WIDTH).toString());

								}

								input.setAttribute("style",a.getAttribute(HTML.Attribute.STYLE).toString());

								if(a.getAttribute(HTML.Attribute.VALUE)!=null){

									input.setAttribute("value",a.getAttribute(HTML.Attribute.VALUE).toString());

								}

//								System.out.println("--"+a.getAttribute(HTML.Attribute));

							}

						}

					}

				}

				count++;

			}

		}



		public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {

			if (t.equals(HTML.Tag.TABLE)) {

				String src = (String) a.getAttribute(HTML.Attribute.NAME);

				Element table=new Element("table");

				root.addContent(table);

				System.out.println("No." + count + "    table  name=" + src);

				count++;

			}

			if(t.equals(HTML.Tag.TR)){

				Element tr=new Element("tr");

				String id=(String)a.getAttribute(HTML.Attribute.ID);

				Element table=(Element)root.getChildren().get(0);

				table.addContent(tr);

				tr.setAttribute("id",id);

				count++;

			}

			if (t.equals(HTML.Tag.TD)) {

				Element table=(Element)root.getChildren().get(0);

				Element tr=new Element("tr");

				Element td=new Element("td");

				String id = (String) a.getAttribute(HTML.Attribute.ID);

				for(int i=0;i<table.getContentSize();i++){

					td=new Element("td");

					tr=(Element)table.getChildren().get(i);

					if(id!=null ){

						String str=id.substring(3,4);

						if(str.equals(tr.getAttributeValue("id"))){

						tr.addContent(td);

						td.setAttribute("id",id);

						}

					}

				}

				System.out.println("No." + count + "   td   id=" + id);

				count++;

			}

			

		}



		private int count = 1;

	}

}