单个文档统计
TermVectorsRequest request = new TermVectorsRequest(index, indexType, id);
request.setFields("content");
request.setFieldStatistics(true);
request.setTermStatistics(true);
request.setPositions(true);
request.setOffsets(true);
request.setPayloads(false);
Map<String, Integer> filterSettings = new HashMap<>();
filterSettings.put("max_num_terms", 10);//词云数量
filterSettings.put("min_term_freq", 2);//在当前文档词的频率
filterSettings.put("max_term_freq", 100);
filterSettings.put("min_doc_freq", 1);//索引中有几个记录出现
filterSettings.put("max_doc_freq", 100);
filterSettings.put("min_word_length", 2);
filterSettings.put("max_word_length", 10);
request.setFilterSettings(filterSettings);
TermVectorsResponse response = elasticsearchTemplate.getClient().termvectors(request, RequestOptions.DEFAULT);
List<TermVectorsResponse.TermVector> termVectorList = response.getTermVectorsList();
for (TermVectorsResponse.TermVector termVector : termVectorList) {
String fieldName = termVector.getFieldName();
TermVectorsResponse.TermVector.FieldStatistics fieldStatistics = termVector.getFieldStatistics();
List<TermVectorsResponse.TermVector.Term> terms = termVector.getTerms();
for (TermVectorsResponse.TermVector.Term term : terms) {
//+ "--" + term.getTokens()
System.out.println("----term:" + term.getTerm() + " -DocFreq:" + term.getDocFreq() + " -TermFreq:" + term.getTermFreq());
//term.getTokens().forEach(s -> System.out.println("----" + s.));
}
}