16,552
社区成员
发帖
与我相关
我的任务
分享
Sub Main()
Dim html As String = File.ReadAllText("test_html.txt", System.Text.Encoding.GetEncoding(936))
Dim mc As System.Text.RegularExpressions.MatchCollection = System.Text.RegularExpressions.Regex.Matches(html, "(?is)开学日期\D*(?<date>[\d-]+).+?<a[^>]*>\s*(?<p1>[^<]+)<[^>]*>(?<p2>[^<]+)(((?!<li).)*<li[^>]*>(?<num_out>\d+)\D*?</li>)+")
Dim data As New List(Of MyType)
For Each m As System.Text.RegularExpressions.Match In mc
Dim t As New MyType
t.Mydate = DateTime.Parse(m.Groups("date").Value)
t.Item = m.Groups("p1").Value
t.Num = Integer.Parse(m.Groups("p2").Value)
For Each c As System.Text.RegularExpressions.Capture In m.Groups("num_out").Captures
t.score.Add(Integer.Parse(c.Value))
Next
data.Add(t)
Next
'data就是对应的数据,
'For Each m As System.Text.RegularExpressions.Match In mc
' Console.WriteLine(m.Groups("date").Value) '时间,日期
' Console.WriteLine(m.Groups("p1").Value) '项目
' Console.WriteLine(m.Groups("p2").Value) '期数
' Dim first As Boolean = True
' For Each c As System.Text.RegularExpressions.Capture In m.Groups("num_out").Captures
' If Not first Then
' Console.Write("、")
' Else
' first = False
' End If
' Console.Write(c.Value) '具体分数
' Next
' Console.WriteLine("--------------神奇的分割线--------------")
'Next
Console.ReadKey()
End Sub
Public Class MyType
Public Mydate As DateTime
Public Item As String
Public Num As Integer
Public score As New List(Of Integer)
End Class
Sub Main()
Dim html As String = File.ReadAllText("test_html.txt", System.Text.Encoding.GetEncoding(936))
Dim mc As System.Text.RegularExpressions.MatchCollection = System.Text.RegularExpressions.Regex.Matches(html, "(?is)开学日期\D*(?<date>[\d-]+).+?<a[^>]*>\s*(?<p1>[^<]+)<[^>]*>(?<p2>[^<]+)(((?!<li).)*<li[^>]*>(?<num_out>\d+)\D*?</li>)+")
For Each m As System.Text.RegularExpressions.Match In mc
Console.WriteLine(m.Groups("date").Value) '时间,日期
Console.WriteLine(m.Groups("p1").Value) '项目
Console.WriteLine(m.Groups("p2").Value) '期数
Dim first As Boolean = True
For Each c As System.Text.RegularExpressions.Capture In m.Groups("num_out").Captures
If Not first Then
Console.Write("、")
Else
first = False
End If
Console.Write(c.Value) '具体分数
Next
Console.WriteLine("--------------神奇的分割线--------------")
Next
Console.ReadKey()
End Sub
2010-09-16
羽毛球
2010108期
02、04、06--------------神奇的分割线--------------
2010-10-16
篮球
2010108期
01、03、05--------------神奇的分割线--------------
Sub Main()
Dim html As String = File.ReadAllText("test_html.txt", System.Text.Encoding.GetEncoding(936))
Dim m As System.Text.RegularExpressions.Match = System.Text.RegularExpressions.Regex.Match(html, "(?is)开学日期\D*(?<date>[\d-]+).+?<a[^>]*>\s*(?<p1>[^<]+)<[^>]*>(?<p2>[^<]+)(((?!<li).)*<li[^>]*>(?<num_out>\d+)\D*?</li>)+")
Console.WriteLine(m.Groups("date").Value)
Console.WriteLine(m.Groups("p1").Value)
Console.WriteLine(m.Groups("p2").Value)
Dim first As Boolean = True
For Each c As System.Text.RegularExpressions.Capture In m.Groups("num_out").Captures
If Not first Then
Console.Write("、")
Else
first = False
End If
Console.Write(c.Value)
Next
Console.ReadKey()
End Sub
2010-09-16
羽毛球
2010108期
02、04、06