vb中inte获取网页内的所有超链接

tongfengzi 2009-12-26 02:44:31
vb中inte获取网页内的所有超链接 怎么获取
包括正则转化
不要用webbrower
我想它实现以下功能
输入网站 自动检索所有链接,并输出所有超链接
判断错误网页 生成地图

以下下载的源码 不是很懂 希望高手解释下
新手 不在乎分 下面代码有错误
Private Sub cmdExit_Click()
Unload Me
End Sub

Private Sub cmdGoWander_Click()
StopSearching = False
rtbLinkNames.Text = ""
On Error Resume Next
Close #1
On Error GoTo 0
' Open CurDir & "\HTTPFILE.TXT" For Append As #1
Current_Pos = 1
Me.MousePointer = vbArrowHourglass
cmdGoWander.Enabled = False
cmdStopWandering.Enabled = True
If Get_File(txtSite.Text) Then
If Not Parse Then
Me.MousePointer = vbNormal
cmdGoWander.Enabled = True
cmdStopWandering.Enabled = False
Exit Sub
End If
Else
Me.MousePointer = vbNormal
MsgBox "Unable to find the desired site."
End If
cmdGoWander.Enabled = True
cmdStopWandering.Enabled = False
Me.MousePointer = vbNormal
StopSearching = True
End Sub

Private Sub cmdStopWandering_Click()
StopSearching = True
End Sub

Private Sub Form_Load()
txtSite.Text = "http://www.jamsa.com/jamsa1.htm"
NewLine = Chr(13) & Chr(10)
StopSearching = True
cmdStopWandering.Enabled = False
End Sub

Private Sub Form_QueryUnload(Cancel As Integer, UnloadMode As Integer)
Dim Response As Integer

If Not StopSearching Then
Response = MsgBox("Stop search and lose updated file?", vbYesNo)
If Response = vbNo Then
Cancel = 1
Exit Sub
End If
Else
Response = MsgBox("Close program?", vbYesNo)
If Response = vbNo Then
Cancel = 1
Exit Sub
End If
End If
itcWander.Cancel
End Sub

Private Sub Form_Unload(Cancel As Integer)
Unload frmSearching
Unload frmParsing
End Sub

Private Sub itcWander_StateChanged(ByVal State As Integer)
Dim strMess As String ' Message variable.

DoEvents
Select Case State
Case 1 'Resolving host
Connected = False

Case icConnected
Connected = True

Case State = icError
strMess = "ErrorCode: " & itcWander.ResponseCode & " : " & itcSearch.ResponseInfo
MsgBox strMess, vbOKOnly
itcWanderSearchError = True
End Select
End Sub


Option Explicit
Option Compare Text

Const TAG_LENGTH% = 1000
Const OUT_FILE = "\taglist.txt"
Public Current_Pos As Long
Public Tag As String
Public Real_File_Name As String
Public File_Name As String
Public Site As String
Public Location As String
Public Site_Length As Integer
Public NewLine As String
Public SiteContents As String
Public inetSearchError As Boolean, StopSearching As Boolean

Public Function TrimPage(ByVal Address As String) As String
Do While Right$(Address, 1) <> "/"
Address = Left$(Address, Len(Address) - 1)
Loop
TrimPage = Address
End Function
Private Function ResolvedSite(FileAddr As String, Parent As String, NewTag As String) As Boolean
'On Error GoTo ResolveError
ResolvedSite = True
Parent = FileAddr
If Right$(Parent, 1) <> "/" Then
Parent = TrimPage(Parent)
End If
If Left$(NewTag, 3) <> "../" And Left$(NewTag, 5) <> "http:" Then
Exit Function
End If
If Left$(NewTag, 6) = "http:/" And Left$(NewTag, 7) <> "http://" Then
NewTag = Right$(NewTag, Len(NewTag) - 6)
End If
Do While Left$(NewTag, 3) = "../"
NewTag = Right$(NewTag, Len(NewTag) - 3)
Parent = Left(Parent, Len(Parent) - 1)
Do While Right$(Parent, 1) <> "/"
Parent = Left$(Parent, Len(Parent) - 1)
Loop
Loop
Exit Function

ResolveError:
ResolvedSite = False
MsgBox "Unable to resolve parent site!"
End Function
Public Function Get_File(ByVal txtURL As String) As Boolean
frmSearching.Hide
frmSearching.lblSite.Caption = txtURL
If Len(txtURL) > 40 Then
frmSearching.lblSite.Width = Len(txtURL) * 73
frmSearching.lblCaption.Width = frmSearching.lblSite.Width
frmSearching.Width = frmSearching.lblCaption.Width + 435
End If
frmSearching.Show
DoEvents
Real_File_Name = txtURL
Site = Real_File_Name
Site_Length = Len(Site)
inetSearchError = False
frmWanderer.itcWander.RequestTimeout = 60
frmWanderer.itcWander.AccessType = icUseDefault
On Error Resume Next
SiteContents = frmWanderer.itcWander.OpenURL(txtURL, icString)
Unload frmSearching
DoEvents
If Err.Number <> 0 And Not inetSearchError Then
Get_File = False
Exit Function
End If
Get_File = True
End Function
Public Sub AddLink(LinktoAdd As String)
Dim FoundPos As Integer

FoundPos = 0
FoundPos = frmWanderer.rtbLinkNames.Find(LinktoAdd, FoundPos)
If FoundPos <> -1 Then 'the phrase was found.
Exit Sub
Else
frmWanderer.rtbLinkNames.Text = frmWanderer.rtbLinkNames.Text & LinktoAdd & NewLine
End If
End Sub
Public Function Parse() As Boolean
Dim PositionInString As Long, Response As Integer, ThisLinkLength As Integer
Dim End_Of_List As Boolean, NewFileName As String, GotFile As Boolean, Parent As String
Dim Done As Boolean, Tag As String, lclTag As String, AddToFileString As String, RelativeAddress As Boolean
Dim lclTag_Length As Integer, I As Integer, FirstQuote As Integer, SecondQuote As Integer

End_Of_List = False
PositionInString = 0
Done = False
If Not Initialize_OutputFile() Then Exit Function
Do While Not End_Of_List And Not StopSearching
Current_Pos = 1
Done = Get_Tag(Tag)
Do While Not Done And Not StopSearching
frmParsing.Show
DoEvents
lclTag = Tag
lclTag_Length = Len(Tag)
FirstQuote = 0
SecondQuote = 0
If InStr(lclTag, "href") Then
Do While Left$(lclTag, 4) <> "href"
lclTag = Right$(lclTag, Len(lclTag) - 1)
Loop
If Not InStr(lclTag, "::") Then
RelativeAddress = True
Else
RelativeAddress = False
End If
For I = 1 To lclTag_Length
If Mid$(lclTag, I, 1) = Chr(34) Then
If FirstQuote <> 0 Then
SecondQuote = I
Exit For
Else
FirstQuote = I + 1
End If
End If
Next
AddToFileString = Mid$(lclTag, FirstQuote, SecondQuote - FirstQuote)
If InStr(AddToFileString, "://") Then
AddLink (AddToFileString)
Else
If Not ResolvedSite(Site, Parent, AddToFileString) Then
frmParsing.Hide
MsgBox "Unable to resolve site!"
Else
AddLink (Parent & AddToFileString)
End If
End If
End If
Done = Get_Tag(Tag)
DoEvents
Loop
frmParsing.Hide
If Done Then
If Len(frmWanderer.rtbLinkNames.Text) > 0 Then frmWanderer.rtbLinkNames.SaveFile App.Path & OUT_FILE, rtfText
GotFile = False
Else
Response = MsgBox("Are you sure you want to stop search?", vbYesNo)
If Response = vbYes Then
frmWanderer.rtbLinkNames.SaveFile App.Path & OUT_FILE, rtfText
frmWanderer.itcWander.Cancel
Parse = Not StopSearching
Exit Function
End If
End If
DoEvents
Do Until GotFile Or StopSearching
If PositionInString < Len(frmWanderer.rtbLinkNames.Text) Then
ThisLinkLength = 0
If PositionInString = 0 Then PositionInString = 1
Do While Mid$(frmWanderer.rtbLinkNames.Text, PositionInString + ThisLinkLength, 1) <> Chr(10)
ThisLinkLength = ThisLinkLength + 1
DoEvents
Loop
NewFileName = Mid$(frmWanderer.rtbLinkNames.Text, PositionInString, ThisLinkLength - 1)
If Left$(NewFileName, 6) <> "mailto" Then
PositionInString = PositionInString + ThisLinkLength + 1
ThisLinkLength = 0
If Not Get_File(NewFileName) Then
MsgBox "Error opening page. Moving on to next page. Bad page = " & NewFileName
GotFile = False
Else
GotFile = True
End If
Else
GotFile = False
End If
Else
GotFile = True
End_Of_List = True
End If
DoEvents
Loop
' frmWanderer.rtbLinkNames.Text = AddToFileString
Loop
Parse = Not StopSearching
End Function
Public Function Get_Tag(ReturnTag As String) As Boolean
ReturnTag = ""
Get_Tag = False

Do While Current_Pos < Len(SiteContents)
If Mid(SiteContents, Current_Pos, 1) = "<" And Mid(SiteContents, Current_Pos + 1, 1) = "A" Then
Dim Local_I As Integer

Local_I = 1
Do While Mid(SiteContents, Current_Pos + Local_I, 1) <> ">"
If Local_I < TAG_LENGTH Then
ReturnTag = ReturnTag & Mid(SiteContents, Current_Pos + Local_I, 1)
End If
Local_I = Local_I + 1
Loop
Current_Pos = Current_Pos + Local_I
Exit Function
End If
Current_Pos = Current_Pos + 1
Loop
Get_Tag = True
End Function
Public Function Initialize_OutputFile() As Boolean
If Dir(App.Path & OUT_FILE) <> "" Then
On Error Resume Next
Kill App.Path & OUT_FILE
If Err.Number <> 0 Then
MsgBox "Unable to open output file.", vbCritical
Initialize_OutputFile = False
Exit Function
End If
End If
Open App.Path & OUT_FILE For Append As #1
Close #1
Initialize_OutputFile = True
Exit Function
End Function
...全文
492 11 打赏 收藏 转发到动态 举报
写回复
用AI写文章
11 条回复
切换为时间正序
请发表友善的回复…
发表回复
tongfengzi 2009-12-28
  • 打赏
  • 举报
回复
[Quote=引用 10 楼 tongfengzi 的回复:]
引用 5 楼 syssz 的回复:
VB code'引用了Microsoft VBScript Regular Expressions 5.5

就这个引用我找不到,vista ie7 绿色vb6.0
[/Quote]

找到了 以解决,经测试可用~就是不懂的怎么使用~我在自己研究把`
谢谢SYSSZ(老张)的详细的回答 同时谢谢 xxyj6450(三断笛) asftrhgjhkjlkttttttt(孤独剑) jhone99(jhone) 准备结贴 送分 第一次提问 ,希望能顺利送出~~~
tongfengzi 2009-12-28
  • 打赏
  • 举报
回复
[Quote=引用 5 楼 syssz 的回复:]
VB code'引用了Microsoft VBScript Regular Expressions 5.5[/Quote]

就这个引用我找不到,vista ie7 绿色vb6.0
tongfengzi 2009-12-28
  • 打赏
  • 举报
回复
[Quote=引用 8 楼 syssz 的回复:]
引用 7 楼 tongfengzi 的回复:
引用 6 楼 xxyj6450 的回复:
可以用正则分析源代码,也可以直接用DOM遍历.
  能详细点吗~~~  谢谢

五楼代码你试了吗?就是用正则分析源代码获取网页内的所有超链接的例子.
[/Quote]
我试试 行的话结贴~ 其实 我是想知道 inet 的 一些用法~ 已找到文章 就差研究了
SYSSZ 2009-12-28
  • 打赏
  • 举报
回复
[Quote=引用 7 楼 tongfengzi 的回复:]
引用 6 楼 xxyj6450 的回复:
可以用正则分析源代码,也可以直接用DOM遍历.
  能详细点吗~~~  谢谢
[/Quote]
五楼代码你试了吗?就是用正则分析源代码获取网页内的所有超链接的例子.
tongfengzi 2009-12-28
  • 打赏
  • 举报
回复
[Quote=引用 6 楼 xxyj6450 的回复:]
可以用正则分析源代码,也可以直接用DOM遍历.
[/Quote] 能详细点吗~~~ 谢谢
三断笛 2009-12-27
  • 打赏
  • 举报
回复
可以用正则分析源代码,也可以直接用DOM遍历.
SYSSZ 2009-12-27
  • 打赏
  • 举报
回复
'引用了Microsoft VBScript Regular Expressions 5.5
Private Sub Command1_Click()
Dim re As RegExp
Dim mh As Match
Dim mhs As MatchCollection
Text1.Text = ""
Source1 = Inet1.OpenURL("www.csdn.net")
If Source1 <> "" Then
Text1.Text = Source1
Me.Inet1.Cancel
End If
Set re = New RegExp
re.Global = True
re.Pattern = "http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?"
Set mhs = re.Execute(Source1)
For Each mh In mhs
Debug.Print mh
Next
End Sub
tongfengzi 2009-12-27
  • 打赏
  • 举报
回复
[Quote=引用 3 楼 asftrhgjhkjlkttttttt 的回复:]

分析源代码,用正则[/Quote] 我不会使用 inet控件 ~~ 望教教~~谢谢~
孤独剑_LPZ 2009-12-26
  • 打赏
  • 举报
回复
东拼西凑的代码,这玩意完成到哪一步了?
分析源代码,用正则,最笨的办法是用instr搜索有没有http标记的
jhone99 2009-12-26
  • 打赏
  • 举报
回复
什么错误?哪个位置?说明了大家才好帮你
tongfengzi 2009-12-26
  • 打赏
  • 举报
回复
没回答的?

1,502

社区成员

发帖
与我相关
我的任务
社区描述
VB 网络编程
社区管理员
  • 网络编程
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧