请问如何用vb提取网页中表格内容?

wolfs 2007-06-27 01:29:50
这个比较有挑战性,因为网页中可能表格没有id属性,并且网页不一样,不能用
利用文字截取
<html>
<table width="200" border="0" cellpadding="0" cellspacing="0" id="xyf">
<tr>
<td >姓名</td>
<td>学历</td>
<td>籍贯</td>
</tr>
<tr>
<td id=xiaoxing1>小红</td>
<td id=xiaoxing2>研究生</td>
<td id=xiaoxing3>北京</td>
</tr>
<tr>
<td id=xiaofeng1>小奉</td>
<td id=xiaofeng2>本科</td>
<td id=xiaofeng3>上海</td>
</tr>
</table>
</html>
如何提取类似网页的内容:
姓名 学历 籍贯
小红 研究生 北京
小奉 本科 上海
当然网页很多,并非都如上面的2行3列。
...全文
843 13 打赏 收藏 转发到动态 举报
写回复
用AI写文章
13 条回复
切换为时间正序
请发表友善的回复…
发表回复
eamean 2008-07-19
  • 打赏
  • 举报
回复
里面那个asTable是什么东西啊?
kmlxk0 2007-06-29
  • 打赏
  • 举报
回复
'两textbox,text1是html代码,text2是输出
'代码如下

Option Explicit

Private Sub Command1_Click()
Dim asTable() As String
Dim lRow As Long
Dim lColumn As Long
Dim i As Long
Dim j As Long
Call ReadHtmlTable(Text1.Text, asTable, lRow, lColumn)
For i = 0 To lRow
For j = 1 To lColumn
Text2.SelStart = 65535
Text2.SelText = asTable(i * lColumn + j - 1) & vbTab
'Debug.Print "asTable("; i * lColumn + j - 1; ")"
Next
Text2.SelStart = 65535
Text2.SelText = vbCrLf
Next
End Sub

Private Sub ReadHtmlTable(ByRef sHtml As String, _
ByRef asTable() As String, _
ByRef lRow As Long, _
ByRef lColumn As Long)


Dim lTablePos As Long
Dim lEndTablePos As Long
Dim lTRPos1 As Long
Dim lTRPos2 As Long
Dim lEndTRPos As Long
Dim sTDContent As String
Dim asTD() As String
Dim lCount As Long
Dim bContinue As Boolean
Dim i As Long

lTablePos = InStr(1, sHtml, "<table", vbTextCompare)
If lTablePos <= 0 Then Exit Sub
lTablePos = InStr(lTablePos, sHtml, ">", vbTextCompare)
lEndTablePos = InStr(lTablePos, sHtml, "</table>", vbTextCompare)
lTRPos1 = InStr(lTablePos, sHtml, "<tr", vbTextCompare)
lTRPos2 = InStr(lTRPos1 + 1, sHtml, ">", vbTextCompare)
lEndTRPos = InStr(lTablePos, sHtml, "</tr>", vbTextCompare)
If lTRPos1 <= 0 Then Exit Sub
bContinue = True
While bContinue
bContinue = True
If lTRPos1 < lTablePos Or lTRPos1 > lEndTablePos Then bContinue = False
If lEndTRPos < lTablePos Or lEndTRPos > lEndTablePos Then bContinue = False
If bContinue Then
sTDContent = Mid(sHtml, lTRPos2 + 1, lEndTRPos - lTRPos2 - 1)
Call ReadHtmlTD(sTDContent, asTD, lCount)
lColumn = lCount + 1
For i = 0 To lCount
ReDim Preserve asTable(lRow * lColumn + lColumn)
asTable(lRow * lColumn + i) = asTD(i)
Debug.Print "asTable("; lRow * lColumn + i; ") = asTD("; i; ")"
'Debug.Print asTD(i),
Next
lRow = lRow + 1
Debug.Print
lTRPos1 = InStr(lTRPos1 + 1, sHtml, "<tr", vbTextCompare)
lTRPos2 = InStr(lTRPos1 + 1, sHtml, ">", vbTextCompare)
lEndTRPos = InStr(lTRPos2 + 2, sHtml, "</tr>", vbTextCompare)
End If
Wend
lRow = lRow - 1
End Sub

Private Sub ReadHtmlTD(ByRef sHtml As String, _
ByRef asTD() As String, _
ByRef lCount As Long)
Dim lTDPos1 As Long
Dim lTDPos2 As Long
Dim lEndTDPos As Long
Dim lLen As Long
lLen = Len(sHtml)

Dim bContinue As Boolean

lCount = 0
lTDPos1 = InStr(1, sHtml, "<td", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
If lTDPos1 <= 0 Then Exit Sub
bContinue = True
While (bContinue)
bContinue = True
If lTDPos1 <= 0 Or lTDPos1 > lLen Then bContinue = False
If lEndTDPos <= 0 Or lEndTDPos > lLen Then bContinue = False
'Debug.Print Mid(sHtml, lTDPos2 + 1, lEndTDPos - lTDPos2 - 1)
If bContinue Then
lCount = lCount + 1
lTDPos1 = InStr(lTDPos1 + 1, sHtml, "<td", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
End If
Wend

ReDim asTD(lCount + 1) As String
lCount = 0
bContinue = True
lTDPos1 = InStr(1, sHtml, "<td", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
While (bContinue)
bContinue = True
If lTDPos1 <= 0 Or lTDPos1 > lLen Then bContinue = False
If lEndTDPos <= 0 Or lEndTDPos > lLen Then bContinue = False
asTD(lCount) = Mid(sHtml, lTDPos2 + 1, lEndTDPos - lTDPos2 - 1)
If bContinue Then
lCount = lCount + 1
lTDPos1 = InStr(lTDPos1 + 1, sHtml, "<td", vbTextCompare)
lTDPos2 = InStr(lTDPos1 + 1, sHtml, ">", vbTextCompare)
lEndTDPos = InStr(lTDPos1 + 1, sHtml, "</td>", vbTextCompare)
End If
Wend
lCount = lCount - 1
End Sub



zzyong00 2007-06-29
  • 打赏
  • 举报
回复
像xghim((︵-︵))哪样自己分析处理也不错啊
of123 2007-06-29
  • 打赏
  • 举报
回复
怎么叫微软没解决?没解决的话,浏览器是如何显示表格的?
cangwu_lee 2007-06-28
  • 打赏
  • 举报
回复
诸位,也可以用规则表达式来取。
guojl 2007-06-28
  • 打赏
  • 举报
回复
我也想知道楼主的问题答案
wolfs 2007-06-28
  • 打赏
  • 举报
回复
看来也只有如此了,郁闷
xghim 2007-06-28
  • 打赏
  • 举报
回复
用分析文本截取字符.
先用instr寻找"<table" 然后寻找<td> 和</td>,取的字符长度,把两个之间的字符取出来.你自己加入空格都可以,再找</tr>,在此长度前的文本都作为一行,之后的换行.


我这方法是笨了点,但也是可以的.
wolfs 2007-06-28
  • 打赏
  • 举报
回复
提取的文件类似网页的内容:
姓名 学历 籍贯
小红 研究生 北京
小奉 本科 上海

但是结果却没空格间隔:
姓名学历籍贯
小红研究生北京
小奉本科上海
怎么办?
wolfs 2007-06-28
  • 打赏
  • 举报
回复
微软为什么这个问题也没解决
guyehanxinlei 2007-06-27
  • 打赏
  • 举报
回复
关注
wolfs 2007-06-27
  • 打赏
  • 举报
回复
关键是 Left(Table1.innerText, 2) = "字段" 中这个“字段”未知,可能是“姓名”也可能“产品名称”还可能是“股票代码”。。。
hongqi162 2007-06-27
  • 打赏
  • 举报
回复
Dim Tables As IHTMLElementCollection
Set Tables = WebBrowser2.document.getElementsByTagName("TABLE") '循环出TagName为TABLE的表格元素
Dim Table1 As HTMLTable
For Each Table1 In Tables
If Left(Table1.innerText, 2) = "字段" Then '找出字段中头两个字符是"字段"的表格
Dim Row As HTMLTableRow, Cell As HTMLTableCell
For i = 1 To Table1.rows.length - 1 ' 逐行处理
Set Row = Table1.rows(i)
Row.cells(j).innerText '读取指定列的内容
Next
End If
Next

7,763

社区成员

发帖
与我相关
我的任务
社区描述
VB 基础类
社区管理员
  • VB基础类社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧