110,561
社区成员
发帖
与我相关
我的任务
分享
<div class="dTrans cl">
<p><span class="dt">n.</span><span class="dd">u8bd5u9a8cuff1bu8003u9a8cuff1bu6d4bu9a8cuff1bu5316u9a8cuff1b</span></p>
<p><span class="dt">vt.</span><span class="dd">u6d4bu9a8cuff1bu8003u67e5uff1bu8003u9a8cuff1bu52d8u63a2uff1b</span></p>
<p><span class="dt">vi.</span><span class="dd">u53d7u8bd5u9a8cuff1bu53d7u6d4bu9a8cuff1bu53d7u8003u9a8cuff1bu6d4bu5f97u7ed3u679cuff1b</span></p>
</div>
private string stripHtml(string strHtml)
{
Regex reg = new Regex("<(.|\n)+?>");
string result = reg.Replace(strHtml, "");
return result;
}
string tempStr = File.ReadAllText(@"C:\Users\myx\Desktop\Test.txt", Encoding.GetEncoding("GB2312"));//读取txt
var _list = Regex.Matches(tempStr, @"(?i)(?<=<div[^>]*?class=(['""]?)dTrans cl(['""]?)[^>]*?>((?!</div>)[\s\S])*?)<p[^>]*?><span[^>]*?>(?<V1>[^<>]*?)</span>\s*?<span[^>]*?>(?<V2>[^<>]*?)</span>\s*?</p>").Cast<Match>().Select(a =>
{
string unicode_str = string.Empty;
MatchCollection mc = Regex.Matches(a.Groups["V2"].Value, @"u([\w]{2})([\w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
byte[] bts = new byte[2];
foreach (Match m in mc)
{
bts[0] = (byte)int.Parse(m.Groups[2].Value, NumberStyles.HexNumber);
bts[1] = (byte)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber);
unicode_str += Encoding.Unicode.GetString(bts);
}
return a.Groups["V1"].Value + unicode_str;
}).ToList();
/*
[0] "n.试验;考验;测验;化验;" string
[1] "vt.测验;考查;考验;勘探;" string
[2] "vi.受试验;受测验;受考验;测得结果;" string
*/