全文检索为什么出错?

bigbearcn 2003-10-17 08:49:39
package common.test;

import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;

import java.io.File;
import java.io.InputStreamReader;
import java.io.FileInputStream;


public class FullTextSearcher {
private String indexPath = "c:/";
private String filePath = "e:/";
private String queryStr = "doc";

public static void main(String[] args) throws Exception{
new FullTextSearcher();
}

public FullTextSearcher() throws Exception{
IndexWriter writer= new IndexWriter(indexPath,new SimpleAnalyzer(),false);
File f = new File(filePath);
File[] fs = f.listFiles();

for(int i=0;i<fs.length;i++) {
Document doc = new Document();
doc.add(Field.UnIndexed("path",fs[i].getName()));
doc.add(Field.Text("content",new InputStreamReader(new FileInputStream(fs[i]))));
writer.addDocument(doc);
}

Searcher searcher = new IndexSearcher(indexPath);
Query query = QueryParser.parse(queryStr,"content",new SimpleAnalyzer());
Hits hits = searcher.search(query);
for(int i=0;i<hits.length();i++) {
System.out.println(hits.doc(i).getField("path") +"\t" + hits.doc(i).getField("content"));
}
}
}
报C:\j2sdk1.4.0_02\bin\javaw.exe -classpath D:\develop;C:\j2sdk1.4.0_02\jre\lib\charsets.jar;C:\j2sdk1.4.0_02\jre\lib\jaws.jar;C:\j2sdk1.4.0_02\jre\lib\jce.jar;C:\j2sdk1.4.0_02\jre\lib\jsse.jar;C:\j2sdk1.4.0_02\jre\lib\rt.jar;C:\j2sdk1.4.0_02\jre\lib\sunrsasign.jar;C:\j2sdk1.4.0_02\jre\lib\ext\dnsns.jar;C:\j2sdk1.4.0_02\jre\lib\ext\ldapsec.jar;C:\j2sdk1.4.0_02\jre\lib\ext\localedata.jar;C:\j2sdk1.4.0_02\jre\lib\ext\sunjce_provider.jar;D:\libs\xerces.jar;D:\libs\jdom-b3.jar;D:\libs\msbase.jar;D:\libs\mssqlserver.jar;D:\libs\msutil.jar;D:\libs\velocity-1.4.jar;D:\libs\servlet.jar;D:\libs\commons-digester.jar;D:\libs\commons-beanutils.jar;D:\libs\commons-collections.jar;D:\libs\commons-logging.jar;C:\Documents and Settings\Administrator\桌面\tiles\lucene-1.2\lucene-1.2.jar;C:\Documents and Settings\Administrator\桌面\tiles\lucene-1.2\lucene-demos-1.2.jar common.test.FullTextSearcher
java.io.IOException: Index locked for write: Lock@C:\write.lock
at org.apache.lucene.index.IndexWriter.<init>(Unknown Source)
at org.apache.lucene.index.IndexWriter.<init>(Unknown Source)
at common.test.FullTextSearcher.<init>(FullTextSearcher.java:28)
at common.test.FullTextSearcher.main(FullTextSearcher.java:24)
Exception in thread "main" Process terminated with exit code 1




...全文
46 9 打赏 收藏 转发到动态 举报
写回复
用AI写文章
9 条回复
切换为时间正序
请发表友善的回复…
发表回复
Jock 2003-10-21
  • 打赏
  • 举报
回复
我这里有个切分单字的,嗬嗬,基于词库的当然是不能给你了!
package org.apache.lucene.analysis.cn;

import java.io.*;

import org.apache.lucene.analysis.*;

public final class ChineseTokenizer
extends Tokenizer {
private int offset;
private int bufferIndex;
private int dataLen;
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 1024;
private final char buffer[] = new char[255];
private final char ioBuffer[] = new char[1024];
private int length;
private int start;

public ChineseTokenizer(Reader reader) {
offset = 0;
bufferIndex = 0;
dataLen = 0;
super.input = reader;
}

private final void push(char c) {
if (length == 0) {
start = offset - 1;
}
buffer[length++] = Character.toLowerCase(c);
}

private final void pop() {
length--;
}

private final Token flush() {
if (length > 0) {
return new Token(new String(buffer, 0, length), start,
start + length);
}
else {
return null;
}
}

public final Token next() throws IOException {
length = 0;
start = offset;
Token token = null;
while (token == null) {
offset++;
if (bufferIndex >= dataLen) {
dataLen = super.input.read(ioBuffer);
bufferIndex = 0;
}
if (dataLen == -1) {
token = flush();
if (token == null || token.termText().length() <= 1) {
return null;
}
break;
}
char c = ioBuffer[bufferIndex++];
switch (Character.getType(c)) {
case Character.TITLECASE_LETTER:
case Character.MODIFIER_LETTER:
case Character.NON_SPACING_MARK:
case Character.ENCLOSING_MARK:
case Character.COMBINING_SPACING_MARK:
default:
if (length == 1) {
pop();
}
else if (length > 0) {
token = flush();
}
break;

case Character.UPPERCASE_LETTER:
case Character.LOWERCASE_LETTER:
case Character.DECIMAL_DIGIT_NUMBER:
if (length > 0 &&
Character.getType(buffer[length - 1]) ==
Character.OTHER_LETTER) {
if (length > 1) {
token = flush();
bufferIndex--;
break;
}
else {
pop();
}
}
push(c);
if (length == 255) {
token = flush();
}
break;

case Character.OTHER_LETTER:
if (length > 0) {
if (Character.getType(buffer[length - 1]) ==
Character.OTHER_LETTER) {
push(c);
token = flush();
bufferIndex--;
}
else {
if (length > 1) {
bufferIndex--;
token = flush();
}
else {
pop();
push(c);
}
}
}
else {
push(c);
}
break;
}
}
/*
String temp = token.termText();
try {
System.out.println("index word: "+new String(temp.getBytes("GBK"))+" word length = "+temp.length());
} catch (Exception e) {}
*/
return token;
}
}
Jock 2003-10-21
  • 打赏
  • 举报
回复
有个别的问题,你为什么用SimpleAnalyzer?这个对中文的支持很弱的,分词是根据标点符号切分的,这样的话你必须匹配至少一句话才能查询出来,做搜索的不用我教你吧,分词是很关键的。
Jock 2003-10-21
  • 打赏
  • 举报
回复
程序不用去找问题了,就是这样的。如果库是空的我的也报同样的错误,但是正常的话就没有问题。
warren04 2003-10-21
  • 打赏
  • 举报
回复
c:\\
Jock 2003-10-21
  • 打赏
  • 举报
回复
对了,你有没有看过车东的那篇文章,可以去他的主页上找找,他有一个lucence的opensource;网址好像是www.chedong.com/lucence/;找找看。解决了告诉我如何建一个空的索引库,嗬嗬,我也一直没有解决,不过我有一个很小的库,每次就在这个上面建,不会出错,但是指定一个空的目录不行,而且我还没有彻底弄明白那个三文件的格式和确切意义。
Jock 2003-10-21
  • 打赏
  • 举报
回复
是因为你在执行的时候,lucence的索引库还没有建成吧,这样的话你怎么查询呢?
有几个缺省的文件你看在不在,write.lock;segment;deletable还有一些以“_”开头的文件,是实际存放索引的文件。
exitzhang 2003-10-21
  • 打赏
  • 举报
回复
c://?
是c:\\吧
不过好像c:/也可以啊
lilyheart1977 2003-10-20
  • 打赏
  • 举报
回复
private String indexPath = "c://";
private String filePath = "e://";
private String queryStr = "doc";


记住全部要// 不能是单斜杆
匪六哥 2003-10-17
  • 打赏
  • 举报
回复
IO错误,是否跟读取文件有关?

帮你顶

67,512

社区成员

发帖
与我相关
我的任务
社区描述
J2EE只是Java企业应用。我们需要一个跨J2SE/WEB/EJB的微容器,保护我们的业务核心组件(中间件),以延续它的生命力,而不是依赖J2SE/J2EE版本。
社区管理员
  • Java EE
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧