67,512
社区成员
发帖
与我相关
我的任务
分享
public class IKAnalyzerDemo {
public static void main(String[] args) {
String fieldName = "text";
String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
Analyzer analyzer = new IKAnalyzer(true);
//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
Directory directory = null;
IndexWriter iwriter = null;
IndexReader ireader = null;
IndexSearcher isearcher = null;
try {
directory = new RAMDirectory();
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
iwConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
iwriter = new IndexWriter(directory, iwConfig);
Document doc = new Document();
doc.add(new StringField(fieldName, text, Field.Store.YES));
iwriter.addDocument(doc);
iwriter.close();
ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader);
// String keyword = "Analyzer";
String keyword = "中文分词工具包";
QueryParser qp = new QueryParser(Version.LUCENE_4_9, fieldName, analyzer);
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query = qp.parse(keyword);
System.out.println("Query = " + query);
TopDocs topDocs = isearcher.search(query, 5);
System.out.println("命中:" + topDocs.totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString());
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} finally {
if (ireader != null) {
try {
ireader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
命中:0
doc.add(new StringField(fieldName, text, Field.Store.YES));
替换为
doc.add(new Field(fieldName, text, Field.Store.YES, Index.ANALYZED));
isearcher = new IndexSearcher(ireader);
System.out.println(isearcher.doc(0).get(fieldName));
这样可以把text内容打印出来, 说明数据已经索引了, 但是isearcher.search(query, 5); 却查询不到!
我猜想不是中文分词不正确的原因, 因为当我查询英文的时候也一样的结果。 甚至把分词器换为StandardAnalyzer后 也同样查询不到数据,不管中文还是英文