Elasticsearch JavaAPI建立分词索引并统计词频

qq_41030970 2019-04-30 04:40:01
求助各位,在项目中,我创建了一个名为“condition”的index,名为“msg”的type,并且仅仅建立一个数据节点。msg里含有一个字段如下:
字段名 属性 是否分析 属性说明
ID String not_analyzed ID
Content String analyzed 具体内容

现在想针对Content字段做一个词云图,需要对Content进行分词并统计词频,因此我想对msg的数据首先建立一个索引wordcloud,然后再对wordcloud的数据进行二次聚合新建一个索引wordcloudkeyword:
字段名 属性
Keyword String 提取出来的关键词
Number long 关键词出现频率

请问这一系列的工作具体应该怎样通过JavaAPI实现?包括wordcloudkeyword建立完成后如何导入数据、如何进行将出现频率前100的词和具体次数信息提取出来传到前端等。本人之前并没有接触过ElasticSearch,因此希望能得到较为细致的步骤和解释说明,很抱歉耽误您的时间,非常感谢!

...全文
324 3 打赏 收藏 转发到动态 举报
写回复
用AI写文章
3 条回复
切换为时间正序
请发表友善的回复…
发表回复
撸起袖子吹风 2019-12-12
  • 打赏
  • 举报
回复
全文统计:

// 会有个问题:对应的字段必须开启fileddata=true
        NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
        queryBuilder.withQuery(QueryBuilders.matchAllQuery());
        queryBuilder.addAggregation(AggregationBuilders.terms("hotWord").field(fieldName).size(10));
        AggregatedPage<AnalyzerIndex> aggPage = (AggregatedPage<AnalyzerIndex>) indexRepository.search(queryBuilder.build());
        Terms trem = (Terms) aggPage.getAggregation("hotWord");
        AtomicInteger i = new AtomicInteger(1);
        trem.getBuckets().forEach(bucket -> {
            System.out.println(i.get() + ":" + bucket.getKey() + "=" + bucket.getDocCount());
            i.getAndIncrement();
        });
撸起袖子吹风 2019-12-12
  • 打赏
  • 举报
回复
单个文档统计


        TermVectorsRequest request = new TermVectorsRequest(index, indexType, id);
        request.setFields("content");
        request.setFieldStatistics(true);
        request.setTermStatistics(true);
        request.setPositions(true);
        request.setOffsets(true);
        request.setPayloads(false);

        Map<String, Integer> filterSettings = new HashMap<>();
        filterSettings.put("max_num_terms", 10);//词云数量
        filterSettings.put("min_term_freq", 2);//在当前文档词的频率
        filterSettings.put("max_term_freq", 100);
        filterSettings.put("min_doc_freq", 1);//索引中有几个记录出现
        filterSettings.put("max_doc_freq", 100);
        filterSettings.put("min_word_length", 2);
        filterSettings.put("max_word_length", 10);
        request.setFilterSettings(filterSettings);
        TermVectorsResponse response = elasticsearchTemplate.getClient().termvectors(request, RequestOptions.DEFAULT);
        List<TermVectorsResponse.TermVector> termVectorList = response.getTermVectorsList();

        for (TermVectorsResponse.TermVector termVector : termVectorList) {
            String fieldName = termVector.getFieldName();
            TermVectorsResponse.TermVector.FieldStatistics fieldStatistics = termVector.getFieldStatistics();
            List<TermVectorsResponse.TermVector.Term> terms = termVector.getTerms();
            for (TermVectorsResponse.TermVector.Term term : terms) {
                //+ "--" + term.getTokens()
                System.out.println("----term:" + term.getTerm() + "  -DocFreq:" + term.getDocFreq() + "  -TermFreq:" + term.getTermFreq());
                //term.getTokens().forEach(s -> System.out.println("----" + s.));
            }
        }
十八道胡同 2019-04-30
  • 打赏
  • 举报
回复
package org.springframework.data.elasticsearch.core;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesResponse;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexResponse;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsRequest;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequestBuilder;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.get.MultiGetRequestBuilder;
import org.elasticsearch.action.get.MultiGetResponse;
import org.elasticsearch.action.get.MultiGetRequest.Item;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.update.UpdateRequestBuilder;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.cluster.metadata.AliasMetaData;
import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Field;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.ScoreSortBuilder;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.core.io.ClassPathResource;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort.NullHandling;
import org.springframework.data.domain.Sort.Order;
import org.springframework.data.elasticsearch.ElasticsearchException;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping;
import org.springframework.data.elasticsearch.annotations.Setting;
import org.springframework.data.elasticsearch.core.aggregation.AggregatedPage;
import org.springframework.data.elasticsearch.core.aggregation.impl.AggregatedPageImpl;
import org.springframework.data.elasticsearch.core.convert.ElasticsearchConverter;
import org.springframework.data.elasticsearch.core.convert.MappingElasticsearchConverter;
import org.springframework.data.elasticsearch.core.facet.FacetRequest;
import org.springframework.data.elasticsearch.core.mapping.ElasticsearchPersistentEntity;
import org.springframework.data.elasticsearch.core.mapping.ElasticsearchPersistentProperty;
import org.springframework.data.elasticsearch.core.mapping.SimpleElasticsearchMappingContext;
import org.springframework.data.elasticsearch.core.query.AliasQuery;
import org.springframework.data.elasticsearch.core.query.CriteriaQuery;
import org.springframework.data.elasticsearch.core.query.DeleteQuery;
import org.springframework.data.elasticsearch.core.query.FetchSourceFilter;
import org.springframework.data.elasticsearch.core.query.GetQuery;
import org.springframework.data.elasticsearch.core.query.IndexBoost;
import org.springframework.data.elasticsearch.core.query.IndexQuery;
import org.springframework.data.elasticsearch.core.query.MoreLikeThisQuery;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.data.elasticsearch.core.query.Query;
import org.springframework.data.elasticsearch.core.query.ScriptField;
import org.springframework.data.elasticsearch.core.query.SearchQuery;
import org.springframework.data.elasticsearch.core.query.SourceFilter;
import org.springframework.data.elasticsearch.core.query.StringQuery;
import org.springframework.data.elasticsearch.core.query.UpdateQuery;
import org.springframework.data.util.CloseableIterator;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

public class ElasticsearchTemplate implements ElasticsearchOperations, ApplicationContextAware {
}
spring里面有个ElasticsearchTemplate,你参考下

81,092

社区成员

发帖
与我相关
我的任务
社区描述
Java Web 开发
社区管理员
  • Web 开发社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧