67,513
社区成员
发帖
与我相关
我的任务
分享
<?xml version="1.0" encoding="UTF-8"?>
<config>
<include path="functions.xml" />
<file action="write" path="fz/2.xml" charset="UTF-8">
<![CDATA[<catalog>]]>
<empty>
<var-def name="priceList" id="priceList">
<xpath expression="//div[@class='winnerLink']">
<html-to-xml>
<http url="http://www.fenzhi.com/xsc1p1.html" />
</html-to-xml>
</xpath>
</var-def>
</empty>
<loop item="item" index="i">
<list>
<var name="priceList"></var>
</list>
<body>
<xquery>
<xq-param name="item" type="node()">
<var name="item" />
</xq-param>
<xq-expression>
<![CDATA[
declare variable $item as node() external;
let $name :=data($item)
return
<info name='{normalize-space($name)}'>
<name>{normalize-space($name)}</name>
</info>
]]>
</xq-expression>
</xquery>
</body>
</loop>
<![CDATA[</catalog>]]>
</file>
</config>
public static void main(String[] args) {
ScraperConfiguration config;
long startTime = 0L;
try {
config = new ScraperConfiguration(
"H:\\workspace\\nutch\\src\\com\\jsq\\nutch\\jianjie.xml");
Scraper scraper = new Scraper(config, "E:\\tmp");// 指定工作目录,爬去后的xml会保存到这里
scraper.setDebug(true);
scraper.execute();
startTime = System.currentTimeMillis();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}