贴个"批量获取网页里的链接地址"
<%
' SqlDataBase = "../database/zz.mdb" '数据库路径
' SqlProvider = "Microsoft.Jet.OLEDB.4.0" '驱动程序[ Microsoft.Jet.OLEDB.4.0 Microsoft.ACE.OLEDB.12.0 ]
' Connstr="Provider="&SqlProvider&";Data Source="&Server.MapPath(SqlDataBase)
'Set Conn=Server.CreateObject("ADODB.Connection")
'Conn.open ConnStr
'Set Rs = Server.CreateObject("ADODB.Recordset")
'================================================
'函数名:GetHTTPPage
'作 用:获取HTTP页
'参 数:url ----远程URL
'返回值:远程HTML代码
'这段是别人写的,
'================================================
Public Function GetRemoteData(ByVal URL, ByVal Cset)
If Len(Cset) < 2 Then Cset = "GB2312"
Dim strHeader
Dim l
On Error Resume Next
Dim Retrieval
Dim ObjStream
Set ObjStream = CreateObject("ADODB.Stream")
ObjStream.Type = 1
ObjStream.Mode = 3
ObjStream.Open
Set Retrieval = CreateObject("MSXML2.XMLHTTP")
With Retrieval
.Open "GET", URL, False
.setRequestHeader "Referer", URL
.send
If .readyState <> 4 Then Exit Function
If .Status > 300 Then Exit Function
'--获取目标网站文件头
strHeader = .getResponseHeader("Content-Type")
strHeader = UCase(strHeader)
ObjStream.Write (.responseBody)
End With
Set Retrieval = Nothing
If Len(strHeader) > 0 Then
'--获取目标文件编码
l = InStrRev(strHeader, "CHARSET=", -1, 1)
If l > 0 Then
Cset = Right(strHeader, Len(strHeader) - l - 7)
Else
Cset = Cset
End If
End If
ObjStream.Position = 0
ObjStream.Type = 2
ObjStream.Charset = Trim(Cset)
GetRemoteData = ObjStream.ReadText
ObjStream.Close
Set ObjStream = Nothing
Exit Function
End Function
'================================================
'函数名:CutFixed
'作 用:截取固定的字符串
'参 数:strHTML ----原字符串
' start ------ 开始字符串
' last ------ 结束字符串
'================================================
Public Function CutFixed(ByVal strHTML, ByVal start, ByVal last)
Dim s
Dim Match
Dim strPattern
Dim strContent
Dim t, l
t = Len(start): l = Len(last)
If t = 0 Or l = 0 Then Exit Function
strPattern = "(" & CorrectPattern(start) & ")(.+?)(" & CorrectPattern(last) & ")"
On Error Resume Next
Dim re
Set re = New RegExp
re.IgnoreCase = False
re.Global = False
re.Pattern = strPattern
Set s = re.Execute(strHTML)
For Each Match In s
strContent = Match.Value
Next
Set s = Nothing
Set re = Nothing
CutFixed = Mid(strContent, t + 1, Len(strContent) - l - t)
Exit Function
End Function
Private Function CorrectPattern(ByVal str)
str = Replace(str, "\", "\\")
str = Replace(str, "~", "\~")
str = Replace(str, "!", "\!")
str = Replace(str, "@", "\@")
str = Replace(str, "#", "\#")
str = Replace(str, "%", "\%")
str = Replace(str, "^", "\^")
str = Replace(str, "&", "\&")
str = Replace(str, "*", "\*")
str = Replace(str, "(", "\(")
str = Replace(str, ")", "\)")
str = Replace(str, "-", "\-")
str = Replace(str, "+", "\+")
str = Replace(str, "[", "\[")
str = Replace(str, "]", "\]")
str = Replace(str, "<", "\<")
str = Replace(str, ">", "\>")
str = Replace(str, ".", "\.")
str = Replace(str, "/", "\/")
str = Replace(str, "?", "\?")
str = Replace(str, "=", "\=")
str = Replace(str, "|", "\|")
str = Replace(str, "$", "\$")
CorrectPattern = str
End Function
'写入数据开始
Sub vnumstr(valplugin)
sql="Select * from zz where add_url='"&valplugin&"'"
Set Rs = Server.CreateObject("ADODB.Recordset")
Rs.Open sql, Conn, 1, 3
If rs.bof or rs.eof Then
Rs.AddNew
rs("add_url")=valplugin
rs("add_createtime1")=Now()
rs.update
End if
rs.close
Set rs=Nothing
End Sub
'写入数据结束
Set re = New RegExp
gets=trim(request("gets"))
homes=split(gets,"/")
homesn=ubound(homes)
'response.write homesn
for i=0 to homesn-1
homesn=homes(2)&"/"&homes(i)
Next
strContent = LCase(replace(GetRemoteData(gets, "GB2312"),"""",""))
sfs=split(strContent,"<a")
sfsa=ubound(sfs)
for sfsaa=0 to sfsa
re.Pattern ="(<a )"
strContent = re.Replace(sfs(sfsaa), "qian")
re.Pattern ="(\/a>)"
strContent = re.Replace(strContent, "hou")
re.Pattern ="( target=)"
strContent = re.Replace(strContent, "")
strContent = Replace(Replace(Replace(Replace(Replace(Replace(strContent, "_blank",""),"'",""),"_self",""),"_parent",""),"_top",""),"blank","")
'response.write strContent
valplugin = CutFixed(strContent, "qian", "hou")
valplugin = CutFixed(strContent, "href=", ">")
valpluginN=InStr(valplugin," ")
If valpluginN>0 then
valpluginvv=Split(valplugin," ")
valplugin=valpluginvv(0)
End If
valpluginJ=InStr(valplugin,"javascript:")
If valpluginJ>0 then
valpluginv=Split(valplugin,"javascript:")
valplugin=valpluginv(0)
End if
if left(valplugin,4)<>"http"then
if left(valplugin,1)="/" then
valplugin="http://"&homes(2)&valplugin
elseif left(valplugin,3)="../" then
valplugin="http://"&homes(2)&replace(valplugin,"../","/")
else
valplugin="http://"&homes(2)&"/"&valplugin
end If
Else
valplugin=valplugin
end if
'Call vnumstr(valplugin)
response.write valplugin&"<BR>"
Next
%>
写得不好,还会出现好多问题,欢迎测试后修正,并把OK的也贴上来,给我一份,谢谢