62,046
社区成员
发帖
与我相关
我的任务
分享
Dim mystr As String '要找的子字符串所在母字符串
Dim pattern As String '判断母字符串的正则表达式
Dim myreader As StreamReader = File.OpenText("C:\Documents and Settings\USER\Desktop\Public\新增文字文件.txt") '打开文件
pattern = "<td .+?class=.+?td[0-9]+?.+?>.+?</td>"
'pattern = "<tb .+?class=.+?tb[0-9]+?.+?>.+?</tb>"
mystr = myreader.ReadLine '读取文件第一行字符串,指针下移
While (mystr IsNot Nothing) '字符串未读取完,循环
If Regex.IsMatch(mystr, pattern) Then '判断母字符串是否包含所要找的子字符串
Dim sonstr As String '子字符串
sonstr = Regex.Match(mystr, ">.+?<").Value '子字符串在母字符串 >< 中
sonstr = sonstr.Replace(">", "").Replace("<", "") '把 >< 去掉得到的字符串就是所要找的数据
MsgBox(sonstr)
End If
mystr = myreader.ReadLine '读取文件当前行字符串,指针下移
End While
Regex regTD = new Regex(@"(?is)(?<=<td[^>]*>)(?!\s*</td)(?:(?!</td\b).)*(?=</td>)", RegexOptions.Compiled);
Regex regRe = new Regex(@"<[^>]*>|\s+", RegexOptions.Compiled);
//替换为
Regex regTD = new Regex(@"(?is)(?<=<td[^>]*>)(?:(?!</td\b).)*(?=</td>)", RegexOptions.Compiled); //去掉html标签为空白字符串不过滤
Regex regRe = new Regex(@"<[^>]*>", RegexOptions.Compiled); //只去掉html标签,不过滤空白字符
/// <summary>
/// 按字节长度截取字符串(支持截取带HTML代码样式的字符串)
/// </summary>
/// <param name="param">将要截取的字符串参数</param>
/// <param name="length">截取的字节长度</param>
/// <param name="end">字符串末尾补上的字符串</param>
/// <returns></returns>
public static string subStringHTML(string param, int length)
{
string Pattern = null;
MatchCollection m = null;
StringBuilder result = new StringBuilder();
int n = 0;
char temp;
bool isCode = false; //是不是HTML代码
bool isHTML = false; //是不是HTML特殊字符,如
char[] pchar = param.ToCharArray();
for (int i = 0; i < pchar.Length; i++)
{
temp = pchar[i];
if (temp == '<')
{
isCode = true;
}
else if (temp == '&')
{
isHTML = true;
}
else if (temp == '>' && isCode)
{
n = n - 1;
isCode = false;
}
else if (temp == ';' && isHTML)
{
isHTML = false;
}
if (!isCode && !isHTML)
{
n = n + 1;
//UNICODE码字符占两个字节
if (System.Text.Encoding.Default.GetBytes(temp + "").Length > 1)
{
n = n + 1;
}
}
result.Append(temp);
if (n >= length)
{
break;
}
}
result.Append("...");
//取出截取字符串中的HTML标记
string temp_result = result.ToString().Replace("(>)[^<>]*(<?)", "$1$2");
//去掉不需要结素标记的HTML标记
temp_result = temp_result.Replace(@"< (AREA|BASE|BASEFONT|BODY|BR|COL|COLGROUP|DD|DT|FRAME|HEAD|HR|HTML|IMG|INPUT|ISINDEX|LI|LINK|META|OPTION|P|PARAM|TBODY|TD|TFOOT|TH|THEAD|TR|area|base|basefont|body|br|col|colgroup|dd|dt|frame|head|hr|html|img|input|isindex|li|link|meta|option|p|param|tbody|td|tfoot|th|thead|tr)[^<>]* >", "");
//去掉成对的HTML标记
temp_result = temp_result.Replace(@"<([a-zA-Z]+)[^<>]*>(.*?)</\1>", "$2");
//用正则表达式取出标记
Pattern = ("<([a-zA-Z]+)[^<>]*>");
m = Regex.Matches(temp_result, Pattern);
ArrayList endHTML = new ArrayList();
foreach (Match mt in m)
{
endHTML.Add(mt.Result("$1"));
}
//补全不成对的HTML标记
for (int i = endHTML.Count - 1; i >= 0; i--)
{
result.Append("</");
result.Append(endHTML[i]);
result.Append(">");
}
return result.ToString();
}
Regex regTR = new Regex(@"(?is)<tr>\s*(?:<td(?:(?!class=).)*class=([""']?)(?:td\d*|mtt?\d*)\1>(?:(?!</td>).)*</td>\s*){5}(?<content>(?:<td(?:(?!class=).)*class=([""']?)(?:td\d*|mtt?\d*)\2>(?:(?!</td>).)*</td>\s*)+)</tr>", RegexOptions.Compiled);
Regex regTD = new Regex(@"(?is)(?<=<td[^>]*>)(?!\s*</td)(?:(?!</td\b).)*(?=</td>)", RegexOptions.Compiled);
Regex regRe = new Regex(@"<[^>]*>|\s+", RegexOptions.Compiled);
MatchCollection mcTR = regTR.Matches(yourStr);
foreach (Match mTR in mcTR)
{
MatchCollection mcTD = regTD.Matches(mTR.Groups["content"].Value);
foreach (Match mTD in mcTD)
{
richTextBox2.Text += regRe.Replace(mTD.Value, "") + "\n";
}
richTextBox2.Text += "\n------------------------------\n";
}
<table style="border-bottom: steelblue thin solid; background-color: #f0f8ff" class="text"
width="920">
<tbody>
<tr>
<td style="width: 2%" class="td3">
<a id="ctl00_cph1_GdvResult_ctl07_linkExapnd" href="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl07$linkExapnd", "", true, "", "", false, true))">+</a></td>
<td style="width: 2%" class="td3">
<input type="image" name="ctl00$cph1$GdvResult$ctl07$imbShow" id="ctl00_cph1_GdvResult_ctl07_imbShow" title="View Detail" src="../Images/edit-record-icon.gif" onclick="javascript:window.open('PopupDiamondDetail.aspx?Pno=71400701&Id=2640','mywin');return false;WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl07$imbShow", "", true, "", "", false, false))" border="0" /></td>
<td style="width: 2%" class="td3">
<input id="ctl00_cph1_GdvResult_ctl07_c" type="checkbox" name="ctl00$cph1$GdvResult$ctl07$c" /></td>
<td style="width: 1%" class="td3">
<a href='../bwcopy/71400701.jpg' target="_blank">
<img src="../forweb/RBC.gif" title="View B&W Copy Of Dimaond" style="border: none"
alt="" /></a></td>
<td style="width: 1%" class="td3">
<input type="image" name="ctl00$cph1$GdvResult$ctl07$imbcol" id="ctl00_cph1_GdvResult_ctl07_imbcol" title="View Color Copy Of Diamond" src="../Images/picimgage.gif" onclick="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl07$imbcol", "", true, "", "", false, false))" border="0" /></td>
<td style="width: 10%" class='mtt'>
71400701
</td>
<td style="width: 6%" class='mtt'>
RBC
</td>
<td id="ctl00_cph1_GdvResult_ctl07_Dwt" style="width: 5%" class="mtt">
0.70
</td>
<td style="width: 8.5%" class='mtt'>
H
</td>
<td style="width: 5%" class='mtt'>
SI1
</td>
<td style="width: 4%" class='mtt'>
VG
</td>
<td id="ctl00_cph1_GdvResult_ctl07_Dprice" style="width: 8%" class="mtt">
1764.00
</td>
<td id="ctl00_cph1_GdvResult_ctl07_Dback" style="width: 6.5%" class="mtt">
-48.12
</td>
<td style="width: 4%" class='mtt'>
<a onclick="javascript:window.open('../Certificate/71400701.jpg');return false;" id="ctl00_cph1_GdvResult_ctl07_linrLab" class="cyanlink" href="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl07$linrLab", "", true, "", "", false, true))">IGI</a></td>
<td style="width: 4%" class='mtt'>
EX
</td>
<td style="width: 4%" class='mtt'>
VG
</td>
<td style="width: 4%" class='mtt'>
N
</td>
<td style="width: 6%" class='mtt'>
62.20
%
</td>
<td style="width: 6%" class='mtt'>
56
%
</td>
<td style="width: 10%" class='mtt'>
5.66
x5.69x3.53
</td>
<td style="width: 2%" title='Diamond On Memo'
class='mt'>
M
</td>
<td style="width: 0.25%" class="td3">
<input type="hidden" name="ctl00$cph1$GdvResult$ctl07$I" id="ctl00_cph1_GdvResult_ctl07_I" value="2640" />
</td>
<td style="width: 0.25%" class="td3">
<input type="hidden" name="ctl00$cph1$GdvResult$ctl07$P" id="ctl00_cph1_GdvResult_ctl07_P" value="71400701" />
</td>
</tr>
</tbody>
</table>
<td style="width: 10%" class='mtt'>
71400701
</td>
<td style="width: 6%" class='mtt'>
RBC
</td>
<td id="ctl00_cph1_GdvResult_ctl07_Dwt" style="width: 5%" class="mtt">
0.70
</td>
<td style="width: 8.5%" class='mtt'>
H
</td>
<td style="width: 5%" class='mtt'>
SI1
</td>
<td style="width: 4%" class='mtt'>
VG
</td>
<td id="ctl00_cph1_GdvResult_ctl07_Dprice" style="width: 8%" class="mtt">
1764.00
</td>
<td id="ctl00_cph1_GdvResult_ctl07_Dback" style="width: 6.5%" class="mtt">
-48.12
</td>
<td style="width: 4%" class='mtt'>
<a onclick="javascript:window.open('../Certificate/71400701.jpg');return false;" id="ctl00_cph1_GdvResult_ctl07_linrLab" class="cyanlink" href="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl07$linrLab", "", true, "", "", false, true))">IGI</a></td>
<td style="width: 4%" class='mtt'>
EX
</td>
<td style="width: 4%" class='mtt'>
VG
</td>
<td style="width: 4%" class='mtt'>
N
</td>
<td style="width: 6%" class='mtt'>
62.20
%
</td>
<td style="width: 6%" class='mtt'>
56
%
</td>
<td style="width: 10%" class='mtt'>
5.66
x5.69x3.53
</td>
<table style="border-bottom: steelblue thin solid; background-color: #f0f8ff" class="text" width="920"><tbody>
<tr>
<td style="width: 2%" class="td3">
<a id="ctl00_cph1_GdvResult_ctl03_linkExapnd" href="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl03$linkExapnd", "", true, "", "", false, true))">+</a></td>
<td style="width: 2%" class="td3">
<input type="image" name="ctl00$cph1$GdvResult$ctl03$imbShow" id="ctl00_cph1_GdvResult_ctl03_imbShow" title="View Detail" src="../Images/edit-record-icon.gif" onclick="javascript:window.open('PopupDiamondDetail.aspx?Pno=73405701&Id=2034','mywin');return false;WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl03$imbShow", "", true, "", "", false, false))" border="0" /></td>
<td style="width: 2%" class="td3">
<input id="ctl00_cph1_GdvResult_ctl03_c" type="checkbox" name="ctl00$cph1$GdvResult$ctl03$c" /></td>
<td style="width: 1%" class="td3">
<a href='../bwcopy/73405701.jpg' target="_blank">
<img src="../forweb/RBC.gif" title="View B&W Copy Of Dimaond" style="border: none"
alt="" /></a></td>
<td style="width: 1%" class="td3">
<input type="image" name="ctl00$cph1$GdvResult$ctl03$imbcol" id="ctl00_cph1_GdvResult_ctl03_imbcol" title="View Color Copy Of Diamond" src="../Images/picimgage.gif" onclick="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl03$imbcol", "", true, "", "", false, false))" border="0" /></td>
<td style="width: 10%" class='td3'>
73405701
</td>
<td style="width: 6%" class='td3'>
RBC
</td>
<td id="ctl00_cph1_GdvResult_ctl03_Dwt" style="width: 5%" class="td3">
0.71
</td>
<td style="width: 8.5%" class='td3'>
F
</td>
<td style="width: 5%" class='td3'>
SI3
</td>
<td style="width: 4%" class='td3'>
VG
</td>
<td id="ctl00_cph1_GdvResult_ctl03_Dprice" style="width: 8%" class="td3">
1342.00
</td>
<td id="ctl00_cph1_GdvResult_ctl03_Dback" style="width: 6.5%" class="td3">
-56.71
</td>
<td style="width: 4%" class='td3'>
<a id="ctl00_cph1_GdvResult_ctl03_linrLab" class="cyanlink" href="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$cph1$GdvResult$ctl03$linrLab", "", true, "", "", false, true))"><font color="Black">NONE</font></a></td>
<td style="width: 4%" class='td3'>
VG
</td>
<td style="width: 4%" class='td3'>
VG
</td>
<td style="width: 4%" class='td3'>
N
</td>
<td style="width: 6%" class='td3'>
63.70
%
</td>
<td style="width: 6%" class='td3'>
58
%
</td>
<td style="width: 10%" class='td3'>
5.59
x5.62x3.58
</td>
<td style="width: 2%" title='Diamond Available'
class='mtt1'>
A
</td>
<td style="width: 0.25%" class="td3">
<input type="hidden" name="ctl00$cph1$GdvResult$ctl03$I" id="ctl00_cph1_GdvResult_ctl03_I" value="2034" />
</td>
<td style="width: 0.25%" class="td3">
<input type="hidden" name="ctl00$cph1$GdvResult$ctl03$P" id="ctl00_cph1_GdvResult_ctl03_P" value="73405701" />
</td>
</tr>
</tbody>
</table>
Regex regTR = new Regex(@"(?is)<tr>\s*(?:<td(?:(?!class=).)*class=([""']?)(?:td\d*|mtt\d*)\1>(?:(?!</td>).)*</td>\s*){5}(?<content>(?:<td(?:(?!class=).)*class=([""']?)(?:td\d*|mtt\d*)\2>(?:(?!</td>).)*</td>\s*)+)</tr>", RegexOptions.Compiled);
Regex regTD = new Regex(@"(?is)(?<=<td[^>]*>)(?!\s*</td)(?:(?!</td\b).)*(?=</td>)", RegexOptions.Compiled);
Regex regRe = new Regex(@"<[^>]*>|\s+", RegexOptions.Compiled);
MatchCollection mcTR = regTR.Matches(yourStr);
foreach (Match mTR in mcTR)
{
MatchCollection mcTD = regTD.Matches(mTR.Groups["content"].Value);
foreach (Match mTD in mcTD)
{
richTextBox2.Text += regRe.Replace(mTD.Value, "") + "\n";
}
richTextBox2.Text += "\n------------------------------\n";
}