lucene4.5建立索引报错，在整不出来就开除了，谢谢

caohaiming_201302 2013-11-08 09:55:37

分词器jar包是paoding-analysis-4.4.0.jar包，在建立索引的时候包错为：

java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'keyword'
at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125)
at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248)
at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202)
at com.artup.search.builder.imageSearch.ImageSearchAllIndexRebuild.index(ImageSearchAllIndexRebuild.java:117)
at com.artup.search.builder.imageSearch.ImageSearchAllIndexRebuild.startCreateIndex(ImageSearchAllIndexRebuild.java:79)
at com.artup.search.thread.CreateLuceneFileThread.run(CreateLuceneFileThread.java:19)

请问是怎么回事？谢谢

...全文

447 7 打赏收藏转发到动态举报

写回复

用AI写文章

7 条回复

切换为时间正序

请发表友善的回复…

发表回复

zhuopu66yunduan3 2015-03-12

打赏
举报

庖丁不适用Lucene3.x以后的分词，用mmseg4j吧！！！

zw1502071 2014-01-16

打赏
举报

我觉得你调用了两次addDocument，你试试每次添加一个docment

huanlin08 2013-11-08

打赏
举报

没那么难吧，我的个人网站 http://www.ablanxue.com 也是用庖丁解牛做分词器。感觉很好用，中文分词很好。建立索引代码片段 // 索引 String rootlucene = ServletActionContext.getRequest() .getRealPath("/lucenexxx/"); File flucene = null; flucene = new File(rootlucene); if (flucene.exists()) { flucene.mkdir(); } IndexWriter writerlucene = null; try { writerlucene = new IndexWriter(rootlucene, new IKAnalyzer(), false); } catch (IOException e) { // 如果没有索引文件，则创建新索引 writerlucene = new IndexWriter(rootlucene, new IKAnalyzer(), true); } writerlucene.setUseCompoundFile(true); Document doc1 = null; doc1 = new Document(); doc1.add(new Field("jianjie", jianjie, Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); doc1.add(new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc1.add(new Field("yearmoth", "" + yearmoth, Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); doc1.add(new Field("id", "" + id, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc1.add(new Field("time", time, Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); writerlucene.addDocument(doc1); writerlucene.optimize(); writerlucene.close(); // 索引搜索

caohaiming_201302 2013-11-08

打赏
举报

完整的代码是： import java.io.File; import java.io.StringReader; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class IKIndexAndSearch{ public static void main(String[] args) throws Exception{ //Lucnene Document的字段名 String fieldName = "text"; //检索的内容 String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,"; String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, "; //String text = "国内水"; //String text1 ="国内山水"; //实例化IKAnalyzer分词器 Analyzer analyzer = new PaodingAnalyzer(); //建立内存目录 Directory dir = new RAMDirectory(); //Directory dir = FSDirectory.open(new File("d:/test/index")); //配置IndexWriterConfig IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer); IndexWriter iwriter = new IndexWriter(dir,config); //写入索引 Document doc = new Document(); doc.add(new StringField("ID","10000",Field.Store.YES)); doc.add(new TextField("text",text,Field.Store.YES)); Document doc1 = new Document(); doc1.add(new StringField("ID","10001",Field.Store.YES)); doc1.add(new TextField("text",text1,Field.Store.YES)); iwriter.addDocument(doc); iwriter.addDocument(doc1); iwriter.close(); //开始搜索 //实例化搜索器 DirectoryReader ireader = DirectoryReader.open(dir); IndexSearcher isearcher = new IndexSearcher(ireader); //String keyword = "中文分词工具包"; //String keyword = "这是一个中文分词的例子"; String keyword = "国内水"; //使用QueryParser查询分析器构造Query对象 Analyzer analyzera = new PaodingAnalyzer(); QueryParser qp = new QueryParser(Version.LUCENE_45,"text",analyzera); qp.setDefaultOperator(QueryParser.Operator.AND); Query query = qp.parse(keyword); System.out.println("QueryParser:"+query.toString()); //搜索相似度最高的5条记录 TopDocs topDocs = isearcher.search(query, 5); System.out.println("命中："+topDocs.totalHits); //输出结果 ScoreDoc[] scoreDocs = topDocs.scoreDocs; //高亮设置 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<B>","</B>"); //设定高亮显示的格式，也就是对高亮显示的词组加上前缀后缀 Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); //设置每次返回的字符数，想必大家在使用搜索引擎的时候也没有一并把全部数据展示出来吧，当然这里也是设定只展示部分数据 for(int i=0;i<topDocs.totalHits;i++){ Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("内容："+targetDoc.toString()); TokenStream tokenStream = analyzer.tokenStream(fieldName,new StringReader( targetDoc.get(fieldName))); String str = highlighter.getBestFragment(tokenStream, targetDoc.get(fieldName)); System.out.println(str); } ireader.close(); dir.close(); } } 完整错误是： 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker getProperties 信息: config paoding analysis from: D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis-default.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analyzer.properties;D:\pro\artup.com\test\bin\paoding-dic-home.properties;D:\pro\artup.com\artup\www_artup\webapps\dic\paoding-dic-names.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives-user.properties 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.CJKKnife 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.LetterKnife 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.NumberKnife Exception in thread "main" java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'text' at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125) at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248) at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254) at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446) at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202) at IKIndexAndSearch.main(IKIndexAndSearch.java:66) lucene版本是：4.5.1 分词器jar包是paoding-analysis-4.4.0.jar包，这个是不是lucene版本与分词器jar包的冲突，万分感谢

nicholasbobo 2013-11-08

打赏
举报

你把错误完整贴上来吧，看不到是哪里报错

caohaiming_201302 2013-11-08

打赏
举报

String fieldName = "text"; //检索的内容 String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,"; String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, "; //String text = "国内水"; //String text1 ="国内山水"; //实例化IKAnalyzer分词器 Analyzer analyzer = new PaodingAnalyzer(); //建立内存目录 Directory dir = new RAMDirectory(); //Directory dir = FSDirectory.open(new File("d:/test/index")); //配置IndexWriterConfig IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer); IndexWriter iwriter = new IndexWriter(dir,config); //写入索引 Document doc = new Document(); doc.add(new StringField("ID","10000",Field.Store.YES)); doc.add(new TextField("text",text,Field.Store.YES)); Document doc1 = new Document(); doc1.add(new StringField("ID","10001",Field.Store.YES)); doc1.add(new TextField("text",text1,Field.Store.YES)); iwriter.addDocument(doc); iwriter.addDocument(doc1); iwriter.close();