求助各位大神,我想截取这两个页面中的中文内容, 请问怎么用一个正则提取出来?

wonota 2017-08-31 11:50:39
页面1

<script src="//exp.bdstatic.com/static/article/umeditor/dialogs/formula/formula.js" type="text/javascript" defer="defer">
!function(){var t=null;UM.registerWidget("formula",{tpl:'<link type="text/css" rel="stylesheet" href="<%=formula_url%>formula.css"><div class="edui-formula-wrapper"><ul class="edui-tab-nav"></ul><div class="edui-tab-content"></div></div>',sourceData:{formula:{common:["{/}frac{ }{ }","^{ }/_{ }","x^{ }","x_{ }","x^{ }_{ }","{/}bar{ }","{/}sqrt{ }","{/}nthroot{ }{ }","{/}sum^{ }_{n=}","{/}sum","{/}log_{ }","{/}ln","{/}int_{ }^{ }","{/}oint_{ }^{ }"],symbol:["+","-","{/}pm","{/}times","{/}ast","{/}div","/","{/}bigtriangleup","=","{/}ne","{/}approx",">","<","{/}ge","{/}le","{/}infty","{/}cap","{/}cup","{/}because","{/}therefore","{/}subset","{/}supset","{/}subseteq","{/}supseteq","{/}nsubseteq","{/}nsupseteq","{/}in","{/}ni","{/}notin","{/}mapsto","{/}leftarrow","{/}rightarrow","{/}Leftarrow","{/}Rightarrow","{/}leftrightarrow","{/}Leftrightarrow"],letter:["{/}alpha","{/}beta","{/}gamma","{/}delta","{/}varepsilon","{/}varphi","{/}lambda","{/}mu","{/}rho","{/}sigma","{/}omega","{/}Gamma","{/}Delta","{/}Theta","{/}Lambda","{/}Xi","{/}Pi","{/}Sigma","{/}Upsilon","{/}Phi","{/}Psi","{/}Omega"]}},initContent:function(e,a){var i=this,n=i.sourceData.formula,r=e.getLang("formula").static,u=UMEDITOR_CONFIG.UMEDITOR_HOME_URL+"dialogs/formula/",o=$.extend({},r,{formula_url:u}),s=i.root();if(i.inited)return void i.preventDefault();i.inited=!0,t=e,i.$widget=a,s.html($.parseTmpl(i.tpl,o)),i.tabs=$.eduitab({selector:"#edui-formula-tab-Jpanel"});var l=[],d=0,c=0,m=i.root().find(".edui-tab-content");$.each(n,function(t,e){var a=[];$.each(e,function(t,e){a.push('<li class="edui-formula-latex-item" data-latex="'+e+'" style="background-position:-'+30*d+"px -"+30*c+'px"></li>'),++d>=8&&(++c,d=0)}),c++,d=0,m.append('<div class="edui-tab-pane"><ul>'+a.join("")+"</ul>"),l.push('<li class="edui-tab-item"><a href="javascript:void(0);" class="edui-tab-text">'+r["lang_tab_"+t]+"</a></li>")}),l.push('<li class="edui-formula-clearboth"></li>'),s.find(".edui-tab-nav").html(l.join("")),s.find(".edui-tab-content").append('<div class="edui-formula-clearboth"></div>'),i.switchTab(0)},initEvent:function(){var t=this;t.root().on("click",function(){return!1}),t.root().find(".edui-tab-nav").delegate(".edui-tab-item","click",function(){return t.switchTab(this),!1}),t.root().find(".edui-tab-pane").delegate(".edui-formula-latex-item","click",function(){var e=$(this),a=e.attr("data-latex")||"";return a&&t.insertLatex(a.replace("{/}","\\")),t.$widget.edui().hide(),!1})},switchTab:function(t){var e=this,a=e.root(),t=$.isNumeric(t)?t:$.inArray(t,a.find(".edui-tab-nav .edui-tab-item"));a.find(".edui-tab-nav .edui-tab-item").removeClass("edui-active").eq(t).addClass("edui-active"),a.find(".edui-tab-content .edui-tab-pane").removeClass("edui-active").eq(t).addClass("edui-active"),e.autoHeight(0)},autoHeight:function(){this.$widget.height(this.root()+2)},insertLatex:function(e){t.execCommand("formula",e)},width:350,height:400})}();
</script>
<div class="exp-content-listblock">
<div class="content-listblock-text">
<p> 谷歌和百度,哪一个更优秀,哪一个更好用?其实这根本无法比较出来,因为两者的侧重点不同,擅长不同的领域。就我个人的感受而言,谷歌似乎更注重结果的精确性,而百度注重的是搜索结果的实用性,两个引擎我都在用。</p>
<p> 那么我们通过一些实际的例子来对比吧:</p>
<p></p>
<p>
<p> <span>可以看到,百度搜索结果更加人性化</span>,在易用性方面是可圈可点的;而谷歌更注重搜索结果的精确性。本人一直从事软件开发工作,就我的经验来说,对于技术方面的问题,谷歌的搜索结果能帮你更快的解决问题;而百度在日常生活方面的搜索也是谷歌目前无法比拟的。</p>
<p> <div id="aa">因此,如果你是一个普通人,百度</div>能帮你更多;如果,你喜欢钻研技术,你应该选择谷歌。</p>
<p></p>
</p>
</div>



页面2

<dd>
<div class="adc" style="width: 728px; margin: 0px auto 5px;">
<iframe class="rbeyhsgkrkvfgmljxosi" scrolling="no" src="/js/html/article_925_180.html" width="728" height="90" frameborder="0"></iframe>
</div>
<p style="TEXT-INDENT: 2em">
<span style="COLOR: #ffc000"></span>
安卓手机不能安装软件其实有很多的原因,排除可以避免的原因(google授权),其他的硬件设施的支持,也可能出现软件不能安装的现象。有的是可以解决的,但有的是强制性原因,那就没有办法了。下面就来分析下软件不能安装的几个原因。
</p>
<p style="TEXT-INDENT: 2em">
<strong>1.最常见的就是,未知软体安装失败。</strong>
</p>
<p style="TEXT-INDENT: 2em">这个其实在塞班手机的时候就有这个权限,这个也只要在手机安全设置里面设置就可以了。这也是运营商为了保证用户不过乱的安装非官方应用程序而造成手机系统紊乱的现象而添加的一个选项。</p>
<p style="TEXT-INDENT: 2em">
解决办法:
<strong>设置安装权限即可(如图)</strong>
</p>
</dd>
...全文
173 3 打赏 收藏 转发到动态 举报
写回复
用AI写文章
3 条回复
切换为时间正序
请发表友善的回复…
发表回复
hongmei85 2017-09-03
  • 打赏
  • 举报
回复

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>无标题文档</title>
</head>
 
<body>
 
<textarea name="" cols="100" rows="30" id="kkk">
<script src="//exp.bdstatic.com/static/article/umeditor/dialogs/formula/formula.js" type="text/javascript" defer="defer">
!function(){var t=null;UM.registerWidget("formula",{tpl:'<link type="text/css" rel="stylesheet" href="<%=formula_url%>formula.css"><div class="edui-formula-wrapper"><ul class="edui-tab-nav"></ul><div class="edui-tab-content"></div></div>',sourceData:{formula:{common:["{/}frac{ }{ }","^{ }/_{ }","x^{ }","x_{ }","x^{ }_{ }","{/}bar{ }","{/}sqrt{ }","{/}nthroot{ }{ }","{/}sum^{ }_{n=}","{/}sum","{/}log_{ }","{/}ln","{/}int_{ }^{ }","{/}oint_{ }^{ }"],symbol:["+","-","{/}pm","{/}times","{/}ast","{/}div","/","{/}bigtriangleup","=","{/}ne","{/}approx",">","<","{/}ge","{/}le","{/}infty","{/}cap","{/}cup","{/}because","{/}therefore","{/}subset","{/}supset","{/}subseteq","{/}supseteq","{/}nsubseteq","{/}nsupseteq","{/}in","{/}ni","{/}notin","{/}mapsto","{/}leftarrow","{/}rightarrow","{/}Leftarrow","{/}Rightarrow","{/}leftrightarrow","{/}Leftrightarrow"],letter:["{/}alpha","{/}beta","{/}gamma","{/}delta","{/}varepsilon","{/}varphi","{/}lambda","{/}mu","{/}rho","{/}sigma","{/}omega","{/}Gamma","{/}Delta","{/}Theta","{/}Lambda","{/}Xi","{/}Pi","{/}Sigma","{/}Upsilon","{/}Phi","{/}Psi","{/}Omega"]}},initContent:function(e,a){var i=this,n=i.sourceData.formula,r=e.getLang("formula").static,u=UMEDITOR_CONFIG.UMEDITOR_HOME_URL+"dialogs/formula/",o=$.extend({},r,{formula_url:u}),s=i.root();if(i.inited)return void i.preventDefault();i.inited=!0,t=e,i.$widget=a,s.html($.parseTmpl(i.tpl,o)),i.tabs=$.eduitab({selector:"#edui-formula-tab-Jpanel"});var l=[],d=0,c=0,m=i.root().find(".edui-tab-content");$.each(n,function(t,e){var a=[];$.each(e,function(t,e){a.push('<li class="edui-formula-latex-item" data-latex="'+e+'" style="background-position:-'+30*d+"px -"+30*c+'px"></li>'),++d>=8&&(++c,d=0)}),c++,d=0,m.append('<div class="edui-tab-pane"><ul>'+a.join("")+"</ul>"),l.push('<li class="edui-tab-item"><a href="javascript:void(0);" class="edui-tab-text">'+r["lang_tab_"+t]+"</a></li>")}),l.push('<li class="edui-formula-clearboth"></li>'),s.find(".edui-tab-nav").html(l.join("")),s.find(".edui-tab-content").append('<div class="edui-formula-clearboth"></div>'),i.switchTab(0)},initEvent:function(){var t=this;t.root().on("click",function(){return!1}),t.root().find(".edui-tab-nav").delegate(".edui-tab-item","click",function(){return t.switchTab(this),!1}),t.root().find(".edui-tab-pane").delegate(".edui-formula-latex-item","click",function(){var e=$(this),a=e.attr("data-latex")||"";return a&&t.insertLatex(a.replace("{/}","\\")),t.$widget.edui().hide(),!1})},switchTab:function(t){var e=this,a=e.root(),t=$.isNumeric(t)?t:$.inArray(t,a.find(".edui-tab-nav .edui-tab-item"));a.find(".edui-tab-nav .edui-tab-item").removeClass("edui-active").eq(t).addClass("edui-active"),a.find(".edui-tab-content .edui-tab-pane").removeClass("edui-active").eq(t).addClass("edui-active"),e.autoHeight(0)},autoHeight:function(){this.$widget.height(this.root()+2)},insertLatex:function(e){t.execCommand("formula",e)},width:350,height:400})}();
</script>
<div class="exp-content-listblock">
<div class="content-listblock-text">
<p>    谷歌和百度,哪一个更优秀,哪一个更好用?其实这根本无法比较出来,因为两者的侧重点不同,擅长不同的领域。就我个人的感受而言,谷歌似乎更注重结果的精确性,而百度注重的是搜索结果的实用性,两个引擎我都在用。</p>
<p>    那么我们通过一些实际的例子来对比吧:</p>
<p></p>
<p>
<p>    <span>可以看到,百度搜索结果更加人性化</span>,在易用性方面是可圈可点的;而谷歌更注重搜索结果的精确性。本人一直从事软件开发工作,就我的经验来说,对于技术方面的问题,谷歌的搜索结果能帮你更快的解决问题;而百度在日常生活方面的搜索也是谷歌目前无法比拟的。</p>
<p>    <div id="aa">因此,如果你是一个普通人,百度</div>能帮你更多;如果,你喜欢钻研技术,你应该选择谷歌。</p>
<p></p>
</p>
</div>
 
<div>
广告
</div>
</textarea>
<input type="button" value="替换" onclick="aa()"/>
 
<script type="text/javascript">
function aa()
{
    var k = document.getElementById("kkk").value;
    alert(k.replace(/[\x00-\xff]/gi,""));
		var i = k.indexOf('<div class="content-listblock-text">');
		var j = k.indexOf('广告',i);
		alert(k.substring(i,j).replace(/<.+?>/gi,""));
}
</script>
 
</body>
</html>
wonota 2017-09-03
  • 打赏
  • 举报
回复
引用 1 楼 usecf 的回复:
<html> <head> <meta charset="utf-8"/> <script type="text/javascript"> var str="怎样从一个Html页面中提取所有汉字呢?不能有其它Html代码。"; alert(str.replace(/[^\u4e00-\u9fa5]/gi,"")); </script> </head> </html>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>无标题文档</title>
</head>

<body>

<textarea name="" cols="100" rows="30" id="kkk">
<script src="//exp.bdstatic.com/static/article/umeditor/dialogs/formula/formula.js" type="text/javascript" defer="defer">
!function(){var t=null;UM.registerWidget("formula",{tpl:'<link type="text/css" rel="stylesheet" href="<%=formula_url%>formula.css"><div class="edui-formula-wrapper"><ul class="edui-tab-nav"></ul><div class="edui-tab-content"></div></div>',sourceData:{formula:{common:["{/}frac{ }{ }","^{ }/_{ }","x^{ }","x_{ }","x^{ }_{ }","{/}bar{ }","{/}sqrt{ }","{/}nthroot{ }{ }","{/}sum^{ }_{n=}","{/}sum","{/}log_{ }","{/}ln","{/}int_{ }^{ }","{/}oint_{ }^{ }"],symbol:["+","-","{/}pm","{/}times","{/}ast","{/}div","/","{/}bigtriangleup","=","{/}ne","{/}approx",">","<","{/}ge","{/}le","{/}infty","{/}cap","{/}cup","{/}because","{/}therefore","{/}subset","{/}supset","{/}subseteq","{/}supseteq","{/}nsubseteq","{/}nsupseteq","{/}in","{/}ni","{/}notin","{/}mapsto","{/}leftarrow","{/}rightarrow","{/}Leftarrow","{/}Rightarrow","{/}leftrightarrow","{/}Leftrightarrow"],letter:["{/}alpha","{/}beta","{/}gamma","{/}delta","{/}varepsilon","{/}varphi","{/}lambda","{/}mu","{/}rho","{/}sigma","{/}omega","{/}Gamma","{/}Delta","{/}Theta","{/}Lambda","{/}Xi","{/}Pi","{/}Sigma","{/}Upsilon","{/}Phi","{/}Psi","{/}Omega"]}},initContent:function(e,a){var i=this,n=i.sourceData.formula,r=e.getLang("formula").static,u=UMEDITOR_CONFIG.UMEDITOR_HOME_URL+"dialogs/formula/",o=$.extend({},r,{formula_url:u}),s=i.root();if(i.inited)return void i.preventDefault();i.inited=!0,t=e,i.$widget=a,s.html($.parseTmpl(i.tpl,o)),i.tabs=$.eduitab({selector:"#edui-formula-tab-Jpanel"});var l=[],d=0,c=0,m=i.root().find(".edui-tab-content");$.each(n,function(t,e){var a=[];$.each(e,function(t,e){a.push('<li class="edui-formula-latex-item" data-latex="'+e+'" style="background-position:-'+30*d+"px -"+30*c+'px"></li>'),++d>=8&&(++c,d=0)}),c++,d=0,m.append('<div class="edui-tab-pane"><ul>'+a.join("")+"</ul>"),l.push('<li class="edui-tab-item"><a href="javascript:void(0);" class="edui-tab-text">'+r["lang_tab_"+t]+"</a></li>")}),l.push('<li class="edui-formula-clearboth"></li>'),s.find(".edui-tab-nav").html(l.join("")),s.find(".edui-tab-content").append('<div class="edui-formula-clearboth"></div>'),i.switchTab(0)},initEvent:function(){var t=this;t.root().on("click",function(){return!1}),t.root().find(".edui-tab-nav").delegate(".edui-tab-item","click",function(){return t.switchTab(this),!1}),t.root().find(".edui-tab-pane").delegate(".edui-formula-latex-item","click",function(){var e=$(this),a=e.attr("data-latex")||"";return a&&t.insertLatex(a.replace("{/}","\\")),t.$widget.edui().hide(),!1})},switchTab:function(t){var e=this,a=e.root(),t=$.isNumeric(t)?t:$.inArray(t,a.find(".edui-tab-nav .edui-tab-item"));a.find(".edui-tab-nav .edui-tab-item").removeClass("edui-active").eq(t).addClass("edui-active"),a.find(".edui-tab-content .edui-tab-pane").removeClass("edui-active").eq(t).addClass("edui-active"),e.autoHeight(0)},autoHeight:function(){this.$widget.height(this.root()+2)},insertLatex:function(e){t.execCommand("formula",e)},width:350,height:400})}();
</script>
<div class="exp-content-listblock">
<div class="content-listblock-text">
<p>    谷歌和百度,哪一个更优秀,哪一个更好用?其实这根本无法比较出来,因为两者的侧重点不同,擅长不同的领域。就我个人的感受而言,谷歌似乎更注重结果的精确性,而百度注重的是搜索结果的实用性,两个引擎我都在用。</p>
<p>    那么我们通过一些实际的例子来对比吧:</p>
<p></p>
<p>
<p>    <span>可以看到,百度搜索结果更加人性化</span>,在易用性方面是可圈可点的;而谷歌更注重搜索结果的精确性。本人一直从事软件开发工作,就我的经验来说,对于技术方面的问题,谷歌的搜索结果能帮你更快的解决问题;而百度在日常生活方面的搜索也是谷歌目前无法比拟的。</p>
<p>    <div id="aa">因此,如果你是一个普通人,百度</div>能帮你更多;如果,你喜欢钻研技术,你应该选择谷歌。</p>
<p></p>
</p>
</div>

<div>
广告
</div>
</textarea>
<input type="button" value="替换" onclick="aa()"/>

<script type="text/javascript">
function aa()
{
	var k = document.getElementById("kkk").innerHTML;
	alert(k.replace(/[^\u4e00-\u9fa5]/gi,""));
}
</script>

</body>
</html>

这个把所有的标点符号都去掉了,我想保留一下文字中标点, 当而且我只想提取有用的文字,一些广告文字不想要, 请问如何来实现
usecf 2017-08-31
  • 打赏
  • 举报
回复
<html> <head> <meta charset="utf-8"/> <script type="text/javascript"> var str="怎样从一个Html页面中提取所有汉字呢?不能有其它Html代码。"; alert(str.replace(/[^\u4e00-\u9fa5]/gi,"")); </script> </head> </html>

87,910

社区成员

发帖
与我相关
我的任务
社区描述
Web 开发 JavaScript
社区管理员
  • JavaScript
  • 无·法
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧