110,534
社区成员
发帖
与我相关
我的任务
分享
string str3 = Regex.Replace("1121212abc ,,,,,中文。eee.abc, ", @"[^a-zA-Z0-9\u4e00-\u9fa5\s]", "");
string str3 = Regex.Replace("1121212a_bc ,,,,にほん,中文。eee.abc, ", @"[^a-zA-Z0-9\s\w]", "");
string str3 = Regex.Replace("1121212a_bc ,,,,にほん,中文。eee.abc, ", @"[^a-zA-Z0-9\s\W_]", "");
static Regex regexLink = new Regex(@"(href|HREF)[ ]*=[ ]*[""'][^""'#>]+[""']", RegexOptions.Compiled);
static Regex regexEmail = new Regex(@"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,6}", RegexOptions.Compiled);
public static string[] ExtractLinks(string html) {
Collection<string> urls = new Collection<string>();
try {
MatchCollection matches = regexLink.Matches(html);
string str = null;
foreach (Match match in matches) {
str = match.Value;
urls.Add(str);
}
} catch (Exception ex) {
Console.WriteLine(ex.Message);
}
return urls.ToArray();
}
public static string[] ExtractEmails(string html) {
Collection<string> emails = new Collection<string>();
try {
string str = null;
MatchCollection matches = regexEmail.Matches(html);
foreach (Match match in matches) {
str = match.Value;
emails.Add(str);
Thread.Sleep(1);
}
} catch (Exception ex) {
Console.WriteLine(ex.Message);
}
return emails.ToArray();
}
string str3 = Regex.Replace("11~21[212a-b^c ,,,,,中文。eee.abc, ", "[~,^.,[。-!]", "");
string str3 = Regex.Replace("1121212abc ,,,,,中文。eee.abc, ", @"[^0-9\u4e00-\u9fa5\s]", "");
//\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b 中文符号
//\p{P}西文符号
//\f\n\r\t\v\x85\p{Z} 空格等