62,616
社区成员
发帖
与我相关
我的任务
分享
import java.util.regex.*;
public class MyRegex2 {
public static void main(String[] args) {
String[] temp={"a","an","this","that","which","what"};
String s="This a xml book.I like xml.\n";
s+="This is a C# book.But I love Java.\n";
s+=" This is an SQL book which I like.\n";
s+="This is a C++ book that I like.\n";
s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格
s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉
for (int i = 0; i < temp.length; i++) {
s=s.replaceAll(" "+temp[i]+" ", " ");
}
System.out.println(s);
}
}
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
private final static String[] stopWords = {
"a", "an", "and", "for", "in", "the", "this", "that", "which", "what"
};
/**
* 用于处理辅助词的正则表达式匹配器
*/
private final static Matcher MATCHER = toRegex(stopWords);
/**
* 去除一些非字符的字符
*/
private final static Matcher REMOVE_OTHER = Pattern.compile("[^a-zA-Z\\s-]").matcher("");
public static void main(String...args) {
String str = "An Instant and, Accurate Estimation Method for Joins and Selection in a Retrieval-Intensive Environment";
long t0, t1;
t0 = System.nanoTime();
String s1 = processStopWord(str);
t1 = System.nanoTime();
System.out.println(s1);
System.out.println(t1 - t0);
}
private static String processStopWord(String statement) {
return MATCHER.reset(REMOVE_OTHER.reset(statement).replaceAll("")).replaceAll("");
}
/**
* 组成正则表达式
* @param stopWords
* @return
*
* 2009-2-22 下午11:06:08
*/
private static Matcher toRegex(String[] stopWords) {
StringBuffer sb = new StringBuffer("\\b(?i:");
for(int i = 0; i < stopWords.length; i++) {
if(i > 0) {
sb.append("|");
}
sb.append(stopWords[i]);
}
sb.append(")\\b\\s*");
// 拼接出的表达式是这样的:
// \b(?i:a|an|and|for|in|the|this|that|which|what)\b\s*
// 但是如果能拼摘成这样效率会高很多
// \b(?i:a(?:nd?)?|for|in|th(?:e|is|at)|wh(?:ich|at))\b\s*
return Pattern.compile(sb.toString()).matcher("");
}
}
import java.util.regex.*;
public class MyRegex2 {
public static void main(String[] args) {
String s="This a xml book.I like xml.\n";
s+="This is a C# book.But I love Java.\n";
s+=" This is a SQL book.\n";
s+="This is a C++ book.\n";
s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格
s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉
Matcher m=Pattern.compile("(?m)\\b(?<= a )(.*)book\\b").matcher(s);
while(m.find())
System.out.println(m.group());
}
}
import java.util.regex.*;
public class MyRegex2 {
public static void main(String[] args) {
String s="This a xml book.I like xml.\n";
s+="This is a C# book.But I love Java.\n";
s+=" This is a SQL book.\n";
s+="This is a C++ book.\n";
s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格
s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉
Matcher m=Pattern.compile("(?m)\\ba(.*)book\\b").matcher(s);
while(m.find())
System.out.println(m.group());
}
}