如何用正则表达式,删除html里面的 css js

yishao 2005-07-10 12:07:01
如何用正则表达式,删除html里面的 css js
...全文
396 8 打赏 收藏 转发到动态 举报
写回复
用AI写文章
8 条回复
切换为时间正序
请发表友善的回复…
发表回复
超级大笨狼 2005-08-06
  • 打赏
  • 举报
回复
上面的文件存成.hta格式,然后你找一个FAQ的帖子另存试验一下。
超级大笨狼 2005-08-06
  • 打赏
  • 举报
回复
参考这个

<button onclick="vbs:gogogo">开始处理</button><br/>

csdnFAQ转换txt格式专用工具:<br/>

<SCRIPT LANGUAGE="vbscript">
'**************************
'*****超级大笨狼***********
'**************************
on error resume next
window.resizeTo window.screen.availWidth,window.screen.availHeight
window.moveTo 0,0


Set fso = CreateObject("Scripting.FileSystemObject")
dim thisFileDir'定义本文件绝对路径
dim thisFileName'定义本文件名
dim thisFileFolder'定义本文件夹路径


dim regEx
set regEx = new RegExp
regEx.Global = true



thisFileDir = replace(window.location.href,"file:///","")
thisFileDir = unescape(replace(thisFileDir,"/","\"))
thisFileName = LastOne(thisFileDir,"\")
thisFileFolder=getFolderDir(thisFileDir)

currentDir = thisFileFolder




function HTMLfilter(str)
'去掉开始到Q:间的东西
regEx.Pattern = "<HTML>[\w\W]*Q :</b>"
str = regEx.Replace(str, "")
'
str = replace(str,"<br>" ,vbcrlf)
str = replace(str," " ," ")
str = replace(str,">",">")
str = replace(str,"<","<")
str = replace(str,""",chr(34))
str = replace(str,vbtab ,"")

regEx.Pattern = "<[^<>]*>"
str = regEx.Replace(str, "")


regEx.Pattern = "[\s]*主要解答者:[\w\W]*查看[\s]*"
str = regEx.Replace(str, vbcrlf)

regEx.Pattern = "[\s]*网站简介[\w\W]*All rights reserved[\s]*"
str = regEx.Replace(str, "")
regEx.Pattern = "(^\s*)|(\s*$)"
str = regEx.Replace(str, "")
HTMLfilter = trim(str)
end function

function getFolderDir(fullDir)
'输入得到全路径,得到文件夹路径
s=LastOne(fullDir,"\")
getFolderDir = left(fullDir,len(fullDir)-len(s))
end function

Function LastOne(Str,splitStr)
'输入字符和分隔符,得到最后一部分
LastOne = right(Str,len(Str)-InStrRev(Str,splitStr))
End Function

sub seachFile(theFolder)

dim f,f1,st,fd,fd1,t
set f = fso.GetFolder(theFolder)

for each f1 in f.Files
if lcase(right(f1.name,4))=".htm" then
set st = fso.OpenTextFile(f1,1,1)
'全读
if not st.AtEndOfStream then
t=st.readAll


set st = fso.OpenTextFile(getName(f1.name),2,1)
t=HTMLfilter(t)
st.write t
end if
end if
next
set fd = fso.GetFolder(theFolder)
for each fd1 in fd.SubFolders
seachFile fd1
next
end sub

sub gogogo()
seachFile thisFileFolder
alert "处理完毕"
end sub

function getName(x)
dim Arr,a,newName
newName=x
Arr=array("/","\",":","*","?",chr(34),"|","<",">",chr(39))
for each a in Arr
newName=replace(newName,a,"")
next
getName=replace(newName,".htm",".txt")
end function
</script>
wztgq 2005-08-06
  • 打赏
  • 举报
回复
下面这个测试过了!
<?php
//去除CSS与JS
$str=<<<EOT
<script>
alert("js1");
</script>
TEst...
style /style
script /script
<script language="javascript" >
alert("js2");
</script>
<style type="text/css">
body{font-size:9pt;background-color:#dddddd}
</style>
EOT;
$str=preg_replace("#\<script.*\>.*\<\/script\>#Usi","",$str);
$str=preg_replace("#\<style.*\>.*\<\/style\>#Usi","",$str);
echo $str;
?>
//输出为
TEst...
style /style
script /script

wztgq 2005-08-06
  • 打赏
  • 举报
回复
prel
wztgq 2005-08-06
  • 打赏
  • 举报
回复
preg里的正则,不知道是否正确。没测试
#\<script\s*(\w+\=\w+)?\s*\>.*\<\/script\>#Usi
#\<style\s*(\w+\=\w+)?\s*\>.*\<\/style\>#Usi
yishao 2005-08-06
  • 打赏
  • 举报
回复
去除不了

<%
url ="http://www.21cn.com"

content=GetBody(url)
'content="<html><head><title>test</title><style>ssss</style><script language='JavaScript'>function checkData(){alert('test');}</script></head><body>content</body></html>"
'去除css
patrn="\<style\>.+?\<\/style\>"
content=RegExp_Replace(patrn,content,"")
'去除js
patrn="\script.+?\<\/script\>"
content=RegExp_Replace(patrn,content,"")

Response.Write Server.HTMLEncode(content)

Function RegExp_Replace(patrn,str,replStr)
Dim regEx ' 建立变量。
Set regEx = New RegExp ' 建立正则表达式。
regEx.Pattern = patrn ' 设置模式。
regEx.IgnoreCase = true ' 设置是否区分大小写。
RegExp_Replace = regEx.Replace(str,replStr) ' 作替换。
End Function

Function GetBody(Url)
Dim objXML
On Error Resume Next
Set objXML = CreateObject("Microsoft.XMLHTTP")
With objXML
.Open "Get", Url, False, "", ""
.Send
GetBody = .ResponseBody
End With
GetBody=BytesToBstr(GetBody,"GB2312")
Set objXML = Nothing
End Function
'使用Adodb.Stream处理二进制数据
Function BytesToBstr(strBody,CodeBase)
dim objStream
set objStream = Server.CreateObject("Adodb.Stream")
objStream.Type = 1
objStream.Mode =3
objStream.Open
objStream.Write strBody
objStream.Position = 0
objStream.Type = 2
objStream.Charset = CodeBase
BytesToBstr = objStream.ReadText
objStream.Close
set objStream = nothing
End Function
%>
yishao 2005-08-06
  • 打赏
  • 举报
回复
to superdullwolf vb?
goodstuday 2005-07-10
  • 打赏
  • 举报
回复
content="<html><head><title>test</title><style>ssss</style><script language='JavaScript'>function checkData(){alert('test');}</script></head><body>content</body></html>"
'去除css
patrn="\<style\>.+?\<\/style\>"
content=RegExp_Replace(patrn,content,"")
'去除js
patrn="\<script.+?\<\/script\>"
content=RegExp_Replace(patrn,content,"")

Response.Write(Server.HTMLEncode(content))

Function RegExp_Replace(patrn,str,replStr)
Dim regEx ' 建立变量。
Set regEx = New RegExp ' 建立正则表达式。
regEx.Pattern = patrn ' 设置模式。
regEx.IgnoreCase = true ' 设置是否区分大小写。
RegExp_Replace = regEx.Replace(str,replStr) ' 作替换。
End Function

28,405

社区成员

发帖
与我相关
我的任务
社区描述
ASP即Active Server Pages,是Microsoft公司开发的服务器端脚本环境。
社区管理员
  • ASP
  • 无·法
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧