Parser parser = new Parser (args[0]);
parser.setEncoding("GB2312");
NodeList list = parser.parse (null);
File out = new File("out.html");
PrintWriter writer = new PrintWriter(new FileOutputStream(out));
writer.write(list.toHtml ());
NodeFilter textFilter = new NodeClassFilter(TextNode.class);
NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
NodeFilter appletFilter = new NodeClassFilter(AppletTag.class);
NodeFilter imageFilter = new NodeClassFilter(ImageTag.class);
NodeFilter frameFilter = new NodeClassFilter(FrameTag.class);
NodeFilter scriptFilter = new NodeClassFilter(ScriptTag.class);
NodeFilter formFilter = new NodeClassFilter(FormTag.class);
NodeFilter objectFilter = new NodeClassFilter(ObjectTag.class);
NodeFilter remarkFilter = new NodeClassFilter(RemarkNode.class);
//暂时不处理 meta
//NodeFilter metaFilter = new NodeClassFilter(MetaTag.class);
//nodes = myParser.extractAllNodesThatAre(TextNode.class); //exception could be thrown here
nodes = (myParser.extractAllNodesThatMatch(new NodeClassFilter(TextNode.class))).toNodeArray();
for (int i = 0; i < nodes.length; i++)
{
TextNode textnode = (TextNode) nodes[i];
String line = textnode.toPlainTextString().trim();
if (line.equals(""))
continue;
System.out.println(line);
}
try
{
File ff = new File(sFileName);
InputStreamReader read = new InputStreamReader(new FileInputStream(ff),
sEncode);
BufferedReader ins = new BufferedReader(read);