抓回来的页,如何提取文本出来?
如 http://www.google.com/intl/zh-CN/
如何提取文本出来?
<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"><title>Google</title><style><!--
body,td,a,p,.h{font-family:arial,sans-serif;}
.h{font-size: 20px;}
.q{color:#0000cc;}
//-->
</style>
<script>
<!--
function sf(){document.f.q.focus();}
function clk(el,ct,cd,sg){if(document.images){(new Image()).src="/url?sa=T&ct="+escape(ct)+"&cd="+escape(cd)+"&url="+escape(el.href).replace(/\+/g,"%2B")+"&ei=t5W-QtX1K6maYMvgpMoN"+sg;}return true;}
function rbi(f){if (navigator.appName == "Netscape") {f.biw.value=self.innerWidth;} else {f.biw.value=document.body.clientWidth;}}// -->
</script>
</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf() topmargin=3 marginheight=3><center><img src="/intl/zh-CN_ALL/images/logo.gif" width=286 height=110 alt="Google"><br><br>
<form action=/search name=f onsubmit="rbi(this);"><script><!--
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
// -->
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>网页</b> <a id=1a class=q href="/imghp?hl=zh-CN&tab=wi" onClick="return qs(this);">图片</a> <a id=2a class=q href="/nwshp?hl=zh-CN&tab=wn" onClick="return qs(this);">新闻</a> <a id=3a class=q href="/grphp?hl=zh-CN&tab=wg" onClick="return qs(this);">论坛</a> <a id=4a class=q href="/dirhp?hl=zh-CN&tab=wd" onClick="return qs(this);">网页目录</a> <b><a href="/intl/zh-CN/options/" class=q>更多 »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=biw value=""><input type=hidden name=hl value=zh-CN><input maxLength=256 size=55 name=q value=""><br><input type=submit value="Google 搜索" name=btnG><input type=submit value="手气不错" name=btnI></td><td valign=top nowrap width=25%><font size=-1> <a href=/advanced_search?hl=zh-CN>高级搜索</a><br> <a href=/preferences?hl=zh-CN>使用偏好</a><br> <a href=/language_tools?hl=zh-CN>语言工具</a></font></td></tr><tr><td colspan=3 align=center><font size=-1><input id=all type=radio name=lr value="" checked><label for=all>搜索所有网页</label><input id=ch type=radio name=lr value=lang_zh-CN|lang_zh-TW><label for=ch>搜索所有中文网页</label><input id=il type=radio name=lr value=lang_zh-CN><label for=il>搜索简体中文网页</label></font></td></tr></table></form><br><font size=-1><font color=#ff0000>最新!</font> <a href="http://www.google.com/intl/zh-CN/options/universities.html" onmousedown="return clk(this,'pro','hppweb:zh-cn_all','')">Google 大学搜索</a>: 查找大学信息,直接用 Google 搜索各大学网站</font><br><br><br><font size=-1><a href=/intl/zh-CN/ads/>广告计划</a> - <a href=/intl/zh-CN/about.html>Google 大全</a> - <a href=http://www.google.com/ncr>Google.com in English</a><span id=hp style="behavior:url(#default#homepage)"></span>
<script>
//<!--
if (!hp.isHomePage('http://www.google.com/')) {document.write("<p><a href=\"/mgyhp.html\" onClick=\"style.behavior='url(#default#homepage)';setHomePage('http://www.google.com/');\">将 Google 设为首页!</a>");}
//-->
</script></font><p><font size=-1>©2005 Google - 搜索 8,058,044,651 张网页</font></p></center></body></html>