62,635
社区成员




String str = null;
/**
* 生成一个字符串
*/
public MySplit() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 1000; i++) {
sb.append(i);
sb.append(";");
}
str = sb.toString();
}
/**
* 使用split分割
*/
public void strSplit() {
for (int i = 0; i < 10; i++) {
str.split(";");
}
}
/**
* 使用StringTokenizer类分割
*/
public void strTokenizer() {
StringTokenizer st = new StringTokenizer(str, ";");
for (int i = 0; i < 10; i++) {
while (st.hasMoreTokens())
st.nextToken();
st = new StringTokenizer(str, ";");
}
}
/**
* 使用indexOf和substring手工编码方式分割
*/
public void strIndexOf() {
String tmp = str;
for (int i = 0; i < 10; i++) {
while (true) {
int idx = tmp.indexOf(';');
if (idx < 0)
break;
tmp = new String(tmp.substring(idx + 1));
}
tmp = str;
}
}
String a = "|aa|bb";
a.split("|").length
//结果是多少?
package splite;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.StringTokenizer;
/**
*
* @author zys59三仙半(QQ:597882752)<br>
* 创建时间:2015年6月15日 上午8:28:29
*/
public class SplitTest {
/**
* @param args
*/
public static void main(String[] args) {
String str = null;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 1000; i++) {
sb.append(i);
sb.append(";");
}
str = sb.toString();
SplitTest test = new SplitTest();
// 使用split分割(61ms)80
long s = Calendar.getInstance().getTimeInMillis();
for (int i = 0; i < 1000; i++) {
str.split(";");
}
long e = Calendar.getInstance().getTimeInMillis();
System.out.println(e - s);
// 使用StringTokenizer类分割(73ms)93
s = Calendar.getInstance().getTimeInMillis();
for (int i = 0; i < 1000; i++) {
test.strTokenizer(str);
}
e = Calendar.getInstance().getTimeInMillis();
System.out.println(e - s);
// 使用indexOf和substring手工编码方式分割(13548ms)12540
s = Calendar.getInstance().getTimeInMillis();
for (int i = 0; i < 1000; i++) {
test.strIndexOf(str);
}
e = Calendar.getInstance().getTimeInMillis();
System.out.println(e - s);
// 55ms//48
s = Calendar.getInstance().getTimeInMillis();
String[] r = null;
for (int i = 0; i < 1000; i++) {
r = test.mySplite(str);
}
e = Calendar.getInstance().getTimeInMillis();
System.out.println(e - s);
//这块是验证一下mySplite()方法的正确性
System.out.println("=========\n分割后项数:" + r.length);
for (int i = 0; i < r.length; i++) {
System.out.println(r[i] + "\t");
}
}
/**
* 分割字符串。
*
* @param src
* @return
*/
public String[] mySplite(String src) {
ArrayList<String> tmp = new ArrayList<String>();
int s = 0;// ,e=0
for (int i = 0; i < src.length(); i++) {
if (src.charAt(i) == ';') {
tmp.add(src.substring(s, i));
s = i + 1;
}
}
String[] result = new String[tmp.size()];
return tmp.toArray(result);
}
/**
* 使用split分割
*/
public void strSplit(String str) {
// for (int i = 0; i < 10; i++) {
str.split(";");
// }
}
/**
* 使用StringTokenizer类分割
*/
public void strTokenizer(String str) {
StringTokenizer st = new StringTokenizer(str, ";");
// for (int i = 0; i < 10; i++) {
while (st.hasMoreTokens())
st.nextToken();
st = new StringTokenizer(str, ";");
// }
}
/**
* 使用indexOf和substring手工编码方式分割
*/
public void strIndexOf(String str) {
String tmp = str;
for (int i = 0; i < 10; i++) {
while (true) {
int idx = tmp.indexOf(';');
if (idx < 0)
break;
tmp = new String(tmp.substring(idx + 1));
}
tmp = str;
}
}
}
我没去看API的源代码,猜测一下,strTokenizer()和split()方法跟我的实现方式应该是一样的,只是它们需要考虑的因素要多一些,也就多一些判断,所以,比mySplite()稍微慢一点儿。而strIndexOf()慢,不是因为indexOf(),而是反复使用了new String()。
代码测试是没有问题的,原因是纯猜测,欢迎大家批评,嘿嘿。tmp = new String(tmp.substring(idx + 1));
得到的
2;3;4;5;
3;4;5;
4;5;
5;
而你想要的应该是1;
2;
3;
4;
5;
也就是说你的结果根本不符合你的要求,
其次, 由于下面这行代码:
tmp = new String(tmp.substring(idx + 1));
产生的临时变量更费内存, 所以需要更多的时间去分配内存, 还有可能会产生更频繁的换页, 导致效率低下, 这个仅仅是有点可能吧, 因为这些产生的临时变量很快就会被丢弃, 所以可能在换页之前就被回收了, 也可能换页是把那些还没回收的变量换出去, 不见得是把那些有用的变量换出去,,,tmp = new String(tmp.substring(旧的idx+1, idx));
没有测试, 应该是这样的tmp = new String(tmp.substring(旧的idx, idx + 1));
tmp = new String(tmp.substring(idx + 1));
是干嘛呢?不是截取吗?那不该是
tmp = new String(tmp.substring(久的idx, idx + 1));
public String[] split(String regex, int limit) {
/* fastpath if the regex is a
(1)one-char String and this character is not one of the
RegEx's meta characters ".$|()[{^?*+\\", or
(2)two-char String and the first char is the backslash and
the second is not the ascii digit or ascii letter.
*/
char ch = 0;
if (((regex.value.length == 1 &&
".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == '\\' &&
(((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
((ch-'a')|('z'-ch)) < 0 &&
((ch-'A')|('Z'-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
ArrayList<String> list = new ArrayList<>();
while ((next = indexOf(ch, off)) != -1) {
if (!limited || list.size() < limit - 1) {
list.add(substring(off, next));
off = next + 1;
} else { // last one
//assert (list.size() == limit - 1);
list.add(substring(off, value.length));
off = value.length;
break;
}
}
// If no match was found, return this
if (off == 0)
return new String[]{this};
// Add remaining segment
if (!limited || list.size() < limit)
list.add(substring(off, value.length));
// Construct result
int resultSize = list.size();
if (limit == 0)
while (resultSize > 0 && list.get(resultSize - 1).length() == 0)
resultSize--;
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);
}
return Pattern.compile(regex).split(this, limit);
}