62,614
社区成员
发帖
与我相关
我的任务
分享
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
...(中间部分省略)
<!-- {start:list -->
<div class="grid-view">
<ul class="cls" id="data-table">
<li>
<div class="img"><a target="_blank" href="http://test.com/abcdefg"> <img src="http://img.mall.test.com/abcd.jpg"> </a></div>
<div class="title">
<a class="ico-b2c" href="http://co.test.com/content/help/2009-08-06/113746233677.html#1" target="_blank" title="测试数据1!"></a><h3><a target="_blank" href="http://book.test.com/">沸腾15年</a></h3>
<a class="xb-icon-small-1 no_name"></a> <a class="xb-icon-small-2 no_name"></a>
</div>
<div class="bot">
价格:<strong>75.<span class="small">00</span></strong>
<a href="#" class="btn-im-online2" account="北京图书出版室"></a><br>
</div>
</li>
<li>
<div class="img"><a target="_blank" href="http://test.com/abcdefg2"> <img src="http://img.mall.test.com/abcd2.jpg"> </a></div>
<div class="title">
<a class="ico-b2c" href="http://co.test.com/content/help/2009-08-06/113746233677.html#1" target="_blank" title="测试数据2!"></a><h3><a target="_blank" href="http://book.test.com/">三国演义</a></h3>
<a class="xb-icon-small-1 no_name"></a> <a class="xb-icon-small-2 no_name"></a>
</div>
<div class="bot">
价格:<strong>255.<span class="small">00</span></strong>
<a href="#" class="btn-im-online2" account="北京图书出版室"></a><br>
</div>
</li>
...(类似的数据)
</ul>
</div>
<!-- }end:list -->
...(后面省略的数据)
public static void main(String[] args) throws IOException {
BufferedReader br = new BufferedReader(new FileReader("c:\\Test.txt"));
String str = null;
StringBuffer sb = new StringBuffer();
while ((str = br.readLine()) != null) {
sb.append(str);
}
String[] arr = sb.toString().split("(?=<div class=\"grid-view\">)");
for (String s : arr) {
Matcher m =
Pattern
.compile(
"<div class=\"img\">\\s*<a[^<>]*href=([^<>]*)/?>\\s*<img\\s*src=([^<>]*)/?>",Pattern.DOTALL)
.matcher(s);
while(m.find()){
System.out.println(m.group(1));
System.out.println(m.group(2));
}
}
}
<div class="grid-view">
<ul class="cls" id="data-table">
<li>
<div class="img"><a target="_blank" href="http://test.com/abcdefg"> <img src="http://img.mall.test.com/abcd.jpg"> </a></div>
<div class="title">
<a class="ico-b2c" href="http://co.test.com/content/help/2009-08-06/113746233677.html#1" target="_blank" title="??数据1!"></a><h3><a target="_blank" href="http://book.test.com/">11111111</a></h3>
<a class="xb-icon-small-1 no_name"></a> <a class="xb-icon-small-2 no_name"></a>
</div>
<div class="bot">
价格:<strong>75.<span class="small">00</span></strong>
<a href="#" class="btn-im-online2" account="北京??出版室"></a><br>
</div>
</li>
<li>
<div class="img"><a target="_blank" href="http://test.com/abcdefg2"> <img src="http://img.mall.test.com/abcd2.jpg"> </a></div>
<div class="title">
<a class="ico-b2c" href="http://co.test.com/content/help/2009-08-06/113746233677.html#1" target="_blank" title="??数据2!"></a><h3><a target="_blank" href="http://book.test.com/">12312312312</a></h3>
<a class="xb-icon-small-1 no_name"></a> <a class="xb-icon-small-2 no_name"></a>
</div>
<div class="bot">
价格:<strong>255.<span class="small">00</span></strong>
<a href="#" class="btn-im-online2" account="北京??出版室"></a><br>
</div>
</li>
...(?似的数据)
</ul>
</div>
<div class="grid-view">
<ul class="cls" id="data-table">
<li>
<div class="img"><a target="_blank" href="http://test.com/abcdefg"> <img src="http://img.mall.test.com/abcd.jpg"> </a></div>
<div class="title">
<a class="ico-b2c" href="http://co.test.com/content/help/2009-08-06/113746233677.html#1" target="_blank" title="??数据1!"></a><h3><a target="_blank" href="http://book.test.com/">沸?151年</a></h3>
<a class="xb-icon-small-1 no_name"></a> <a class="xb-icon-small-2 no_name"></a>
</div>
<div class="bot">
价格:<strong>75.<span class="small">00</span></strong>
<a href="#" class="btn-im-online2" account="北京??出版室"></a><br>
</div>
</li>
<li>
<div class="img"><a target="_blank" href="http://test.com/abcdefg2"> <img src="http://img.mall.test.com/abcd2.jpg"> </a></div>
<div class="title">
<a class="ico-b2c" href="http://co.test.com/content/help/2009-08-06/113746233677.html#1" target="_blank" title="??数据2!"></a><h3><a target="_blank" href="http://book.test.com/">三国演?</a></h3>
<a class="xb-icon-small-1 no_name"></a> <a class="xb-icon-small-2 no_name"></a>
</div>
<div class="bot">
价格:<strong>255.<span class="small">00</span></strong>
<a href="#" class="btn-im-online2" account="北京??出版室"></a><br>
</div>
</li>
...(?似的数据111)
</ul>
</div>
String s = "aaa<div class=\"grid-view\">bbb";
Pattern p = Pattern.compile("<div class=\"grid-view\">");
Matcher m = p.matcher(s);
if (m.find()) System.out.print(m.group());
String s = "<li>aaa</li><li>bbb</li><li>ccc</li>";
Pattern p = Pattern.compile("<li>.*?</li>");
Matcher m = p.matcher(s);
while (m.find()) System.out.println(m.group());