去掉所有HTML标记
Function FilterHTML(str,strlen)
Dim re
Set re=new RegExp
re.IgnoreCase =True
re.Global=True
re.Pattern="<(.[^>]*)>"
str=re.Replace(str,"")
set re=Nothing
Dim l,t,c,i
l=Len(str)
t=0
For i=1 to l
c=Abs(Asc(Mid(str,i,1)))
If c>255 Then
t=t+2
Else
t=t+1
End If
If t>=strlen Then
cutStr=left(str,i)&"..."
Exit For
Else
cutStr=str
End If
Next
cutStr=Replace(cutStr,chr(10),"")
cutStr=Replace(cutStr,chr(13),"")
End Function
function delhtmlcode(str,inum)
'str="<dsfd>aaaa<sdfa>sdfsard<sdfasf>dfsfs"
dim conversion_str,icur'conversion_str为取出的不是html的字符串,icurstart为"<"的开始位置,icurend为">"的开始位置
conversion_str=""
str=str
for i=1 to len(str)
icurstart=instr(i,str,"<")
if cint(icurstart)=0 then
conversion_str=conversion_str & mid(str,i)'在此处<><
i=len(str)
else
conversion_str=conversion_str&mid(str,i,icurstart-i)'取出<前面的字符串
icurend=instr(icurstart,str,">")
if cint(icurend)=0 then
conversion_str=conversion_str&mid(str,icurstart)
i=len(str)
else'如果查到<>
i=icurend
end if
end if
next
'response.write conversion_str
conversion_str=replace(conversion_str," ","")
if inum=0 then
delhtmlcode=conversion_str
else
delhtmlcode=addleft(conversion_str,inum)
end if
end function
function addleft(content,num)
if num>len(content) then
addleft=content
else
addleft=left(content,num)&"..."
end if
end function
regEx.Pattern = "[\s]*网站简介[\w\W]*All rights reserved[\s]*"
str = regEx.Replace(str, "")
regEx.Pattern = "(^\s*)|(\s*$)"
str = regEx.Replace(str, "")
HTMLfilter = trim(str)
end function
function getFolderDir(fullDir)
'输入得到全路径,得到文件夹路径
s=LastOne(fullDir,"\")
getFolderDir = left(fullDir,len(fullDir)-len(s))
end function
Function LastOne(Str,splitStr)
'输入字符和分隔符,得到最后一部分
LastOne = right(Str,len(Str)-InStrRev(Str,splitStr))
End Function
sub seachFile(theFolder)
dim f,f1,st,fd,fd1,t
set f = fso.GetFolder(theFolder)
for each f1 in f.Files
if lcase(right(f1.name,4))=".htm" then
set st = fso.OpenTextFile(f1,1,1)
'全读
if not st.AtEndOfStream then
t=st.readAll
set st = fso.OpenTextFile(getName(f1.name),2,1)
t=HTMLfilter(t)
st.write t
end if
end if
next
set fd = fso.GetFolder(theFolder)
for each fd1 in fd.SubFolders
seachFile fd1
next
end sub
sub gogogo()
seachFile thisFileFolder
alert "处理完毕"
end sub
function getName(x)
dim Arr,a,newName
newName=x
Arr=array("/","\",":","*","?",chr(34),"|","<",">",chr(39))
for each a in Arr
newName=replace(newName,a,"")
next
getName=replace(newName,".htm",".txt")
end function
</script>