28,391
社区成员
发帖
与我相关
我的任务
分享
<%
Response.Buffer = False
Server.ScriptTimeout = 1
Function getVal(ByRef str, ByRef num, ByRef html, ByRef key)
Dim si, ei, oi, tstart, tend, floor, length
length = Len(str)
If num = 0 Then
tstart = InStr(str, "<" & html & key & ">") '这里偷懒就不用正则了:D
Else
tstart = InStr(num, str, "<" & html & key & ">") '这里偷懒就不用正则了:D
End If
If tstart > 0 Then
ei = tstart : oi = ei : floor = 1
While floor > 0
ei = ei + 1
ei = InStr(ei, str, "</" & html & ">") '匹配结束标记
If ei > 0 Then '没找到结束标记即刚好匹配完或异常错误(标记不全)
floor = floor - 1 '层 - 1
si = oi + 1
si = InStr(si, str, "<" & html)
While si > 0 And si < ei '匹配这个结束标记前的所有开始标记(查找嵌套)
si = si + 1
si = InStr(si, str, "<" & html)
floor = floor + 1
Wend
oi = ei '记录上一次的结尾标记避免匹配完结了ei = 0的情况
Else
floor = 0
End If
Wend
tend = oi - tstart + Len("</" & html & ">")
getVal = Array(Mid(str, tstart, tend), oi + Len("</" & html & ">"))
Else
getVal = Array("", length)
End If
End Function
Dim str, a, i
str = "<body><div class=""sdfsdfsaf"">sdfasdfsadf" _
& "<div class=""column762 Firstbg"">" _
& "<div class=""title"">" _
& "<h2>180ETF(510180) </h2>" _
& "<h2>180ETF(510181) </h2>" _
& "<span>2007-11-16 </span>" _
& "<span class=""tg14"">11.2910 </span><img width=""16"" height=""20"" alt="""" src=""/images/down.gif""/><span class=""tg12"">-1.4231% </span></div>" _
& "<div class=""clear""/>" _
& "</div>" _
& "</div></div>" _
& "<div>sdfasdfsadf" _
& "</div><div></div></body>"
With Response
.Write Server.HTMLEncode(getVal(str, 0, "div", " class=""sdfsdfsaf""")(0))
.Write "<hr />"
.Write Server.HTMLEncode(getVal(str, 0, "div", " class=""sdfsdfsaf""")(0))
.Write "<hr />"
.Write Server.HTMLEncode(getVal(str, 0, "div", " class=""title""")(0))
.Write "<hr />"
.Write Server.HTMLEncode(getVal(str, 0, "div", " class=""column762 Firstbg""")(0))
.Write "<hr />"
.Write Server.HTMLEncode(getVal(str, 0, "body", "")(0))
.Write "<hr />"
.Write Server.HTMLEncode(getVal(str, 0, "h2", "")(0))
.Write "<hr />"
.Write Server.HTMLEncode(getVal(str, 0, "span", " class=""tg14""")(0))
.Write "<hr />"
a = getVal(str, 0, "h2", "")
i = a(1)
While i < Len(str)
.Write Server.HTMLEncode(a(0)) & "<br />"
a = getVal(str, i, "h2", "")
i = a(1)
Wend
End With
%>
pat="<div class=""column762 Firstbg"">[\s\S]+?<\\div>[^\n]"
String buf = new String();
read(buf); // 把要提取的字符串放进buf中
String key = "column762"
int iKey = buf.find(key); // 寻找column762的位置,有的是用indexOf() 函数
int retStart = buf.find('>',iKey)+1; //返回字符串的起始位置
int countDivInside = 0; // 内部<div></div>的个数
int iCurrent = iKey;
while(countDivInside >= 0)
{
int iDivStart = buf.find("<div",iCurrent);
int iDivEnd = buf.find("</div",iCurrent);
if(iDivStart < iDivEnd) //出现一次<div>, countDivInside++
{
countDivInside++;
iCurrent = iDivStart+4;
}
else
{
countDivInside--; //出现一次</div>, countDivInside--
iCurrent = iDivEnd+4;
}
}
int retEnd = iCurrent; //返回字符串的中止位置
return buf.substr(retStart,retEnd);