50,523
社区成员
发帖
与我相关
我的任务
分享
String str="<td width=\"31%\" height=25 valign=\"top\" > <div align=\"left\">ISO 22745-1-2010</div></td>";
Matcher m=Pattern.compile("<div[^<>]*>([^<>]*)</div>").matcher(str);
if(m.find())
System.out.println(m.group(1).trim());
String str1="<td width=\"34%\" height=25 valign=\"top\" > <div align=\"left\"><a href=\"list_standard_content.asp?stand_id=ISO@22745-1-2010\" target=\"_blank\">";
Matcher m1=Pattern.compile("<td width=\"34%\"[^<>]*>\\s*<div[^<>]*>\\s*<a href=\"[^\"]+(stand_id=[^\"]*)\".*>").matcher(str1);
while(m1.find())
System.out.println(m1.group(1).trim());
}
ISO 22745-1-2010
ISO@22745-1-2010
ISO@22745-1-2010
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TextHtmlRegex {
public static void main(String[] args) {
String str1="<td width=\"31%\" height=25 valign=\"top\" > \n" +
"<div align=\"left\">\n" +
"ISO 22745-1-2010\n" +
"</div>\n"+
"</td>";
String str2="<td width=\"34%\" height=25 valign=\"top\" > \n" +
"<div align=\"left\">\n" +
"<a href=\"list_standard_content.asp?stand_id=ISO@22745-1-2010\" target=\"_blank\">\n" +
"<font color=\"#000066\">工业自动化系统和集成.开放技术字典及其应用于主数据.第1部分:综述与基本原则</font>\n" +
"</a>\n" +
"</div>\n" +
"</td>";
String str3="<td width=\"35%\" height=25 valign=\"top\" >\n" +
"<div align=\"left\">\n" +
"<a href=\"list_standard_content.asp?stand_id=ISO@22745-1-2010\" target=\"_blank\">\n" +
"<font color=\"#000066\">Industrial automation systems and integration - Open technical dictionaries and their application to master data - Part 1: Overview and fundamental principles\n" +
"</font>\n" +
"</a>\n" +
"</div>\n" +
"</td>";
Pattern p1=Pattern.compile("(?s)<td\\swidth=\"31%\".*<div.*>([^<>]+)<[/]div>.*");//(?s)的意思是忽略它后面的所有换行字符(\n)
Matcher m1=p1.matcher(str1);
if(m1.find()) {
System.out.println(m1.group(1).trim());
}
Pattern p2=Pattern.compile("(?s)<td\\swidth=\"34%\".*<a\\shref=.*[?]stand_id=([^\"<>/&]+)[\"&].*<[/]a>.*");
Matcher m2=p2.matcher(str2);
if(m2.find()) {
System.out.println(m2.group(1).trim());
}
Pattern p3=Pattern.compile("(?s)<td\\swidth=\"35%\".*<a\\shref=.*[?]stand_id=([^\"<>/&]+)[\"&].*<[/]a>.*");
Matcher m3=p3.matcher(str3);
if(m3.find()) {
System.out.println(m3.group(1).trim());
}
}
}