lucene4.5建立索引报错,在整不出来就开除了,谢谢

caohaiming_201302 2013-11-08 09:55:37
分词器jar包是paoding-analysis-4.4.0.jar包,在建立索引的时候包错为:

java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'keyword'
at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125)
at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248)
at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202)
at com.artup.search.builder.imageSearch.ImageSearchAllIndexRebuild.index(ImageSearchAllIndexRebuild.java:117)
at com.artup.search.builder.imageSearch.ImageSearchAllIndexRebuild.startCreateIndex(ImageSearchAllIndexRebuild.java:79)
at com.artup.search.thread.CreateLuceneFileThread.run(CreateLuceneFileThread.java:19)

请问是怎么回事?谢谢
...全文
421 7 打赏 收藏 转发到动态 举报
写回复
用AI写文章
7 条回复
切换为时间正序
请发表友善的回复…
发表回复
zhuopu66yunduan3 2015-03-12
  • 打赏
  • 举报
回复
庖丁不适用Lucene3.x以后的分词,用mmseg4j吧!!!
zw1502071 2014-01-16
  • 打赏
  • 举报
回复
我觉得你调用了两次addDocument,你试试每次添加一个docment
huanlin08 2013-11-08
  • 打赏
  • 举报
回复
没那么难吧,我的个人网站 http://www.ablanxue.com 也是用庖丁解牛做分词器。感觉很好用,中文分词很好。 建立索引代码片段 // 索引 String rootlucene = ServletActionContext.getRequest() .getRealPath("/lucenexxx/"); File flucene = null; flucene = new File(rootlucene); if (flucene.exists()) { flucene.mkdir(); } IndexWriter writerlucene = null; try { writerlucene = new IndexWriter(rootlucene, new IKAnalyzer(), false); } catch (IOException e) { // 如果没有索引文件,则创建新索引 writerlucene = new IndexWriter(rootlucene, new IKAnalyzer(), true); } writerlucene.setUseCompoundFile(true); Document doc1 = null; doc1 = new Document(); doc1.add(new Field("jianjie", jianjie, Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); doc1.add(new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc1.add(new Field("yearmoth", "" + yearmoth, Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); doc1.add(new Field("id", "" + id, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc1.add(new Field("time", time, Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); writerlucene.addDocument(doc1); writerlucene.optimize(); writerlucene.close(); // 索引 搜索
caohaiming_201302 2013-11-08
  • 打赏
  • 举报
回复
完整的代码是: import java.io.File; import java.io.StringReader; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class IKIndexAndSearch{ public static void main(String[] args) throws Exception{ //Lucnene Document的字段名 String fieldName = "text"; //检索的内容 String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,"; String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, "; //String text = "国内水"; //String text1 ="国内山水"; //实例化IKAnalyzer分词器 Analyzer analyzer = new PaodingAnalyzer(); //建立内存目录 Directory dir = new RAMDirectory(); //Directory dir = FSDirectory.open(new File("d:/test/index")); //配置IndexWriterConfig IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer); IndexWriter iwriter = new IndexWriter(dir,config); //写入索引 Document doc = new Document(); doc.add(new StringField("ID","10000",Field.Store.YES)); doc.add(new TextField("text",text,Field.Store.YES)); Document doc1 = new Document(); doc1.add(new StringField("ID","10001",Field.Store.YES)); doc1.add(new TextField("text",text1,Field.Store.YES)); iwriter.addDocument(doc); iwriter.addDocument(doc1); iwriter.close(); //开始搜索 //实例化搜索器 DirectoryReader ireader = DirectoryReader.open(dir); IndexSearcher isearcher = new IndexSearcher(ireader); //String keyword = "中文分词工具包"; //String keyword = "这是一个中文分词的例子"; String keyword = "国内水"; //使用QueryParser查询分析器构造Query对象 Analyzer analyzera = new PaodingAnalyzer(); QueryParser qp = new QueryParser(Version.LUCENE_45,"text",analyzera); qp.setDefaultOperator(QueryParser.Operator.AND); Query query = qp.parse(keyword); System.out.println("QueryParser:"+query.toString()); //搜索相似度最高的5条记录 TopDocs topDocs = isearcher.search(query, 5); System.out.println("命中:"+topDocs.totalHits); //输出结果 ScoreDoc[] scoreDocs = topDocs.scoreDocs; //高亮设置 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<B>","</B>"); //设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀 Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); //设置每次返回的字符数,想必大家在使用搜索引擎的时候也没有一并把全部数据展示出来吧,当然这里也是设定只展示部分数据 for(int i=0;i<topDocs.totalHits;i++){ Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("内容:"+targetDoc.toString()); TokenStream tokenStream = analyzer.tokenStream(fieldName,new StringReader( targetDoc.get(fieldName))); String str = highlighter.getBestFragment(tokenStream, targetDoc.get(fieldName)); System.out.println(str); } ireader.close(); dir.close(); } } 完整错误是: 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker getProperties 信息: config paoding analysis from: D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis-default.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analyzer.properties;D:\pro\artup.com\test\bin\paoding-dic-home.properties;D:\pro\artup.com\artup\www_artup\webapps\dic\paoding-dic-names.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives-user.properties 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.CJKKnife 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.LetterKnife 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.NumberKnife Exception in thread "main" java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'text' at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125) at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248) at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254) at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446) at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202) at IKIndexAndSearch.main(IKIndexAndSearch.java:66) lucene版本是:4.5.1 分词器jar包是paoding-analysis-4.4.0.jar包,这个是不是lucene版本与分词器jar包的冲突,万分感谢
nicholasbobo 2013-11-08
  • 打赏
  • 举报
回复
你把错误完整贴上来吧,看不到是哪里报错
caohaiming_201302 2013-11-08
  • 打赏
  • 举报
回复
String fieldName = "text"; //检索的内容 String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,"; String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, "; //String text = "国内水"; //String text1 ="国内山水"; //实例化IKAnalyzer分词器 Analyzer analyzer = new PaodingAnalyzer(); //建立内存目录 Directory dir = new RAMDirectory(); //Directory dir = FSDirectory.open(new File("d:/test/index")); //配置IndexWriterConfig IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer); IndexWriter iwriter = new IndexWriter(dir,config); //写入索引 Document doc = new Document(); doc.add(new StringField("ID","10000",Field.Store.YES)); doc.add(new TextField("text",text,Field.Store.YES)); Document doc1 = new Document(); doc1.add(new StringField("ID","10001",Field.Store.YES)); doc1.add(new TextField("text",text1,Field.Store.YES)); iwriter.addDocument(doc); iwriter.addDocument(doc1); iwriter.close();
最美的词 2013-11-08
  • 打赏
  • 举报
回复
把你建立索引的代码贴出来,我看看先

81,094

社区成员

发帖
与我相关
我的任务
社区描述
Java Web 开发
社区管理员
  • Web 开发社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧