数据格式
一条特色检测包含一下字段:
sort_index: 活跃度分值;
tInfoId: 公司id;
id:id;
title: 标题;
companyName: 机构名称;
需求描述
特色检测列表按活跃度分值由高到低进行排序,1个机构仅显示1条特色检测;对分组后的结果分页展示,每页展示10条,
需求分析
1.根据tInfoId进行分组,分组后的结果(t10001,t10002,t10003等等);
2.取出每组内最大的sort_index,例如 机构id为t10001的特色检测最大sort_index为5000,以此类推;
3.按最大sort_index(第二步计算结果)倒序排序;
4. 取出每个tInfoId组内对应最大sort_index的最新的一条特色检测id
es搜索聚合语句分布进行
1.先对tinfoId进行分组
GET woyaoce_index_service_new/_search
{
"size": 0,
"query": {
"match_all": { }
},
"aggs": {
"infoIdCount": {
"cardinality": { //对tInfoid去重后计算总数
"field": "tInfoid"
}
},
"tInfoid": { //tInfoid是自定义名称
"terms": {
"field": "tInfoid", //分组字段为:tinfoid, 进行分组
"size": 100000
}
}
}
}
结果:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"tInfoid" : {
"doc_count_error_upper_bound" : 415,
"sum_other_doc_count" : 25018,
"buckets" : [
{
"key" : "T100186",
"doc_count" : 1661
},
{
"key" : "T132369",
"doc_count" : 915
},
{
"key" : "T134837",
"doc_count" : 900
}
]
}
}
}
2. 计算出每组内最大的sort index
GET woyaoce_index_service_new/_search
{
"size": 0,
"query": {
"match_all": { }
},
"aggs": {
"infoIdCount": {
"cardinality": { //对tInfoid去重后计算总数
"field": "tInfoid"
}
},
"tInfoid": {
"terms": {
"field": "tInfoid",
"size": 3
},
"aggs": {
"max_sort_index": { //max_sort_index是自定义名称
"max": { //计算每个组内最大的sortindex
"field": "sortIndex"
}
}
}
}
}
}
结果:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"tInfoid" : {
"doc_count_error_upper_bound" : 415,
"sum_other_doc_count" : 25018,
"buckets" : [
{
"key" : "T100186",
"doc_count" : 1661,
"max_sort_index" : {
"value" : 45112.0 //最大的sortindex
}
},
{
"key" : "T132369",
"doc_count" : 915,
"max_sort_index" : {
"value" : 42675.0
}
},
{
"key" : "T134837",
"doc_count" : 900,
"max_sort_index" : {
"value" : 39772.0
}
}
]
}
}
}
3. 对每个组内的元数据进行操作,取出分组最大sortindex对应的最新特色检测id和机构名称
GET woyaoce_index_service_new/_search
{
"size": 0,
"query": {
"match_all": { }
},
"aggs": {
"infoIdCount": {
"cardinality": { //对tInfoid去重后计算总数
"field": "tInfoid"
}
},
"tInfoid": {
"terms": {
"field": "tInfoid",
"size": 3
},
"aggs": {
"max_sort_index": {
"max": {
"field": "sortIndex"
}
},
"max_id": { //max_id自定义名称
"top_hits": { //操作每组的元数据
"sort": [
{
"sortIndex": {
"order": "desc" //按sortindex对元数据进行倒序排序
}
},
{
"id": {
"order": "desc"
}
}
],
"size": 1, //最大的sortindex下对应多条特色检测,只取第一条特色检测
"_source": {
"includes": [ // 只取特色检测中的id和infoName字段
"id",
"infoName"
]
}
}
}
}
}
}
}
结果:
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"tInfoid" : {
"doc_count_error_upper_bound" : 444,
"sum_other_doc_count" : 25918,
"buckets" : [
{
"key" : "T100186",
"doc_count" : 1661,
"max_id" : {
"hits" : {
"total" : {
"value" : 1661,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "woyaoce_index_service_new",
"_type" : "_doc",
"_id" : "38665",
"_score" : null,
"_source" : {
"infoName" : "上海微谱化工技术服务有限公司",
"id" : 38665
},
"sort" : [
45112
]
}
]
}
},
"max_sort_index" : {
"value" : 45112.0
}
},
{
"key" : "T132369",
"doc_count" : 915,
"max_id" : {
"hits" : {
"total" : {
"value" : 915,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "woyaoce_index_service_new",
"_type" : "_doc",
"_id" : "37680",
"_score" : null,
"_source" : {
"infoName" : "斯坦德检测集团股份有限公司 ",
"id" : 37680
},
"sort" : [
42675
]
}
]
}
},
"max_sort_index" : {
"value" : 42675.0
}
}
]
}
}
}
4. 对结果进行排序和分页
GET woyaoce_index_service_new/_search
{
"size": 0,
"query": {
"match_all": { }
},
"aggs": {
"infoIdCount": {
"cardinality": { //对tInfoid去重后计算总数
"field": "tInfoid"
}
},
"tInfoid": {
"terms": {
"field": "tInfoid",
"size": 2
},
"aggs": {
"max_sort_index": {
"max": {
"field": "sortIndex"
}
},
"max_id": {
"top_hits": {
"sort": [
{
"sortIndex": {
"order": "desc"
}
},
{
"id": {
"order": "desc"
}
}
],
"_source": {
"includes": [
"id",
"infoName"
]
},
"size": 1
}
},
"sort_page": { // sort_page 自定义名称
"bucket_sort": { //进行排序和分页
"sort": [
{
"max_sort_index": { //按第2步计算出的最大sortindex倒序排序
"order": "desc"
}
}
],
"from": 0, //起始索引
"size": 10 //每页大小
}
}
}
}
}
}
结果:
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"tInfoid" : {
"doc_count_error_upper_bound" : 444,
"sum_other_doc_count" : 25918,
"buckets" : [
{
"key" : "T100186",
"doc_count" : 1661,
"max_id" : {
"hits" : {
"total" : {
"value" : 1661,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "woyaoce_index_service_new",
"_type" : "_doc",
"_id" : "38665",
"_score" : null,
"_source" : {
"infoName" : "上海微谱化工技术服务有限公司",
"id" : 38665
},
"sort" : [
45112
]
}
]
}
},
"max_sort_index" : {
"value" : 45112.0
}
},
{
"key" : "T132369",
"doc_count" : 915,
"max_id" : {
"hits" : {
"total" : {
"value" : 915,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "woyaoce_index_service_new",
"_type" : "_doc",
"_id" : "37680",
"_score" : null,
"_source" : {
"infoName" : "斯坦德检测集团股份有限公司 ",
"id" : 37680
},
"sort" : [
42675
]
}
]
}
},
"max_sort_index" : {
"value" : 42675.0
}
}
]
}
}
}
完整的搜索语句
GET woyaoce_index_service_new/_search
{
"size": 0,
"query": {
"match_all": { }
},
"aggs": {
"tInfoid": {
"terms": {
"field": "tInfoid",
"size": 100000
},
"aggs": {
"max_sort_index": {
"max": {
"field": "sortIndex"
}
},
"max_id": {
"top_hits": {
"sort": [
{
"sortIndex": {
"order": "desc"
}
},
{
"id": {
"order": "desc"
}
}
],
"_source": {
"includes": [
"id",
"infoName"
]
},
"size": 1
}
},
"sort_page": {
"bucket_sort": {
"sort": [
{
"max_sort_index": {
"order": "desc"
}
}
],
"from": 0,
"size": 10
}
}
}
}
}
}
Java实现
/*** * @description: 分页查询买家首页特色检测模块的特色检测列表(排序分值由高到低进行排序 , 1个机构仅显示1条, 分页展示) * @param: page limit * @return com.app.appapi.result.PageResult<com.app.appapi.core.es.vo.TestServiceIndexDocumentNew> * @author yangmin * @date: 2022/1/11 15:50 */
public PageResult<TestServiceIndexDocumentNew> getBuyerIndexTestServiceList(Integer page, Integer limit) {
PageResult<TestServiceIndexDocumentNew> pageResult = new PageResult<>();
int from = (page - 1) * limit;
SearchRequest searchRequest = new SearchRequest(indexName);
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.query(QueryBuilders.matchAllQuery());
builder.size(0);
//对tInfoid去重后计算总数
builder.aggregation(AggregationBuilders.cardinality("infoIdCount").field("tInfoid"));
//tInfoid进行聚合
TermsAggregationBuilder infoIdAgg = AggregationBuilders.terms("tInfoid").field("tInfoid").size(Integer.MAX_VALUE);
// 子聚合后拿到最大的max_sort_index
infoIdAgg.subAggregation( AggregationBuilders.max("max_sort_index").field("sortIndex"));
// 对每个组内的元数据进行操作,取出每组最大sortindex对应的最新特色检测id和机构名称(根据sortIndex和id进行降序排序)
List<SortBuilder<?>> sortList = new ArrayList<>(2);
sortList.add(SortBuilders.fieldSort("sortIndex").order(SortOrder.DESC));
sortList.add(SortBuilders.fieldSort("id").order(SortOrder.DESC));
String[] include = { "id", "infoName", "firstCheckItemList", "firstCategory", "sendSampleVOList",
"periceVo", "infoIsOut", "tel", "isCMA", "isCNAS", "tsTitle"};
String[] exclude = { };
infoIdAgg.subAggregation(AggregationBuilders.topHits("max_id").sorts(sortList).fetchSource(include, exclude).size(1));
//对聚合后的结果进行排序和分页
List<FieldSortBuilder> fieldSortList = new ArrayList<>(1);
fieldSortList.add(SortBuilders.fieldSort("max_sort_index").order(SortOrder.DESC));
infoIdAgg.subAggregation(new BucketSortPipelineAggregationBuilder("bucket_sort", fieldSortList).from(from).size(limit));
builder.aggregation(infoIdAgg);
searchRequest.source(builder);
SearchResponse response;
try {
response = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = response.getAggregations();
//获取聚合后的总的分组数
Cardinality idCount = aggregations.get("infoIdCount");
int total = (int) idCount.getValue();
pageResult.setTotal(total);
pageResult.setSize(limit);
pageResult.setPageNo(page);
int pages = (total + limit - 1) / limit;
pageResult.setPages(pages);
//获取机构id聚合后的数据
Terms aggregation = aggregations.get("tInfoid");
//获取该聚合后数据的桶
List<? extends Terms.Bucket> buckets = aggregation.getBuckets();
for (Terms.Bucket bucket : buckets) {
//解析每组最大sortindex对应的最新特色检测的信息
TopHits maxId = bucket.getAggregations().get("max_id");
Map<String, Object> sourceAsMap = maxId.getHits().getHits()[0].getSourceAsMap();
String s = JSONObject.toJSONString(sourceAsMap);
TestServiceIndexDocumentNew testServiceIndexDocumentNew = JSONObject.parseObject(s, TestServiceIndexDocumentNew.class);
pageResult.getRows().add(testServiceIndexDocumentNew);
}
} catch (IOException e) {
e.printStackTrace();
return pageResult;
}
return pageResult;
}
原文链接:https://blog.csdn.net/weixin_45239670/article/details/122431901