java es聚合分组 es分组聚合查询

转载

数据侠客行 2023-10-27 09:17:23

文章标签 java es聚合分组 elasticsearch mget bulk 升序 文章分类 Java 后端开发

Elasticsearch分组集合

一、分组聚合操作

开启fielddata属性

1.在ElasticSearch中默认fielddata默认是false的，因为开启Text的fielddata后对内存的占用很高

如果进行聚合查询时候就需要开启 fielddata 属性，如下：

PUT /leafproduct/_mapping/product
{
  "properties": {
    "tags":{
      "type": "text",
      "fielddata":true
    }
  }
}

2.分组查询（每一个标签tags下有多少个商品）

（1）语法
 GET /leafproduct/product/_search
 {
   "size": 0, 
   "aggs": {
     "group_by_tags": {
       "terms": {
         "field": "tags"
       }
     }
   }
 }

（2）结果

{
  "took" : 29,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_tags" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "roushui",
          "doc_count" : 2
        },
        {
          "key" : "sihua",
          "doc_count" : 2
        },
        {
          "key" : "lishi",
          "doc_count" : 1
        },
        {
          "key" : "oulaiya",
          "doc_count" : 1
        }
      ]
    }
  }
}

3.查询加统计（对name包含"xifalu"的商品,每一个标签下有多少商品）

（1）语法：
GET /leafproduct/product/_search
{
  "size": 0, 
  "query": {
    "match": {
      "name": "xifalu"
    }
  },
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
      }
    }
  }
  
}

（2）结果

{
  "took" : 9,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_tags" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "roushui",
          "doc_count" : 2
        },
        {
          "key" : "sihua",
          "doc_count" : 2
        },
        {
          "key" : "lishi",
          "doc_count" : 1
        },
        {
          "key" : "oulaiya",
          "doc_count" : 1
        }
      ]
    }
  }
}

4.集合分析（先按照tags分组，再计算，每个分组下商品价格的平均值，再按照平均价格升序排序）

（1）语法
GET /leafproduct/product/_search
{
  "size": 0, 
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
        , "order": {
          "avg_price": "asc"
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
  
}

（2）结果

  {
    "took" : 8,
    "timed_out" : false,
    "_shards" : {
      "total" : 5,
      "successful" : 5,
      "skipped" : 0,
      "failed" : 0
    },
    "hits" : {
      "total" : 3,
      "max_score" : 0.0,
      "hits" : [ ]
    },
    "aggregations" : {
      "group_by_tags" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "roushui",
            "doc_count" : 2,
            "avg_price" : {
              "value" : 40.24499988555908
            }
          },
          {
            "key" : "oulaiya",
            "doc_count" : 1,
            "avg_price" : {
              "value" : 49.5
            }
          },
          {
            "key" : "sihua",
            "doc_count" : 2,
            "avg_price" : {
              "value" : 55.48999881744385
            }
          },
          {
            "key" : "lishi",
            "doc_count" : 1,
            "avg_price" : {
              "value" : 79.98999786376953
            }
          }
        ]
      }
    }
  }

5.集合分析（先按照price区间分组，再按照tags分组，计算每个分组下商品价格的平均值，再按照平均价格升序排序）

（1）语法
GET /leafproduct/product/_search
{
  "size": 0, 
  "aggs": {
    "group_by_price": {
      "range": {
        "field": "price",
        "ranges": [
          {
            "from": 0,
            "to": 30
          },
          {
            "from": 30,
            "to": 50
          },
          {
            "from": 50,
            "to": 100
          }
        ]
      },
      "aggs": {
        "group_by_tags": {
          "terms": {
            "field": "tags",
            "order": {
              "avg_price": "asc"
            }
          },
          "aggs": {
            "avg_price": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
  
}

（2）结果：

{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_price" : {
      "buckets" : [
        {
          "key" : "0.0-30.0",
          "from" : 0.0,
          "to" : 30.0,
          "doc_count" : 0,
          "group_by_tags" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [ ]
          }
        },
        {
          "key" : "30.0-50.0",
          "from" : 30.0,
          "to" : 50.0,
          "doc_count" : 2,
          "group_by_tags" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "sihua",
                "doc_count" : 1,
                "avg_price" : {
                  "value" : 30.989999771118164
                }
              },
              {
                "key" : "roushui",
                "doc_count" : 2,
                "avg_price" : {
                  "value" : 40.24499988555908
                }
              },
              {
                "key" : "oulaiya",
                "doc_count" : 1,
                "avg_price" : {
                  "value" : 49.5
                }
              }
            ]
          }
        },
        {
          "key" : "50.0-100.0",
          "from" : 50.0,
          "to" : 100.0,
          "doc_count" : 1,
          "group_by_tags" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "lishi",
                "doc_count" : 1,
                "avg_price" : {
                  "value" : 79.98999786376953
                }
              },
              {
                "key" : "sihua",
                "doc_count" : 1,
                "avg_price" : {
                  "value" : 79.98999786376953
                }
              }
            ]
          }
        }
      ]
    }
  }
}

二、批量查询和批量更新

1.批量查询 mget多文档查询

(1).不指定定某一索引和类型

可以指定不同索引下，不同类型数据，一起批量查询
eg:
GET _mget
{
  "docs":[
     {
        "_index" : "leafproduct",
        "_type" : "product",
        "_id" : "1"
     },  
     {
        "_index" : "leafproduct",
        "_type" : "product",
        "_id" : "2"
     }
  ]
}

结果：

{
  "docs" : [
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "1",
      "_version" : 5,
      "found" : true,
      "_source" : {
        "name" : "haifeisi xifalu",
        "desc" : "rousishunhua",
        "price" : 30.99,
        "producer" : "haifeisi_PRODUCER",
        "tags" : [
          "roushui",
          "sihua"
        ]
      }
    },
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "2",
      "_version" : 1,
      "found" : true,
      "_source" : {
        "name" : "oulaiya xifalu",
        "desc" : "buxiu",
        "price" : 49.5,
        "producer" : "oulaiya",
        "tags" : [
          "roushui",
          "oulaiya"
        ]
      }
    }
  ]
}

(2).不指定类型

可以指定索引，不指定类型，一起批量查询
eg: 
GET /leafproduct/_mget
{
  "docs":[
     {
        "_type" : "product",
        "_id" : "1"
     },  
     {
        "_type" : "product",
        "_id" : "2"
     }
  ]
}

结果：

{
  "docs" : [
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "1",
      "_version" : 5,
      "found" : true,
      "_source" : {
        "name" : "haifeisi xifalu",
        "desc" : "rousishunhua",
        "price" : 30.99,
        "producer" : "haifeisi_PRODUCER",
        "tags" : [
          "roushui",
          "sihua"
        ]
      }
    },
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "2",
      "_version" : 1,
      "found" : true,
      "_source" : {
        "name" : "oulaiya xifalu",
        "desc" : "buxiu",
        "price" : 49.5,
        "producer" : "oulaiya",
        "tags" : [
          "roushui",
          "oulaiya"
        ]
      }
    }
  ]
}

(3).固定索引，固定类型，批量根据ID查询，并且只查询某些字段

eg：批量查询id为1 2的document,且 仅查询返回 name,tags字段
GET /leafproduct/product/_mget
{
  "docs":[
     {
        "_id" : "1",
        "_source":["name","tags"]
     },  
     {
        "_id" : "2",
        "_source":["name","tags"]
     }
  ]
}

结果：
{
  "docs" : [
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "1",
      "_version" : 5,
      "found" : true,
      "_source" : {
        "name" : "haifeisi xifalu",
        "tags" : [
          "roushui",
          "sihua"
        ]
      }
    },
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "2",
      "_version" : 1,
      "found" : true,
      "_source" : {
        "name" : "oulaiya xifalu",
        "tags" : [
          "roushui",
          "oulaiya"
        ]
      }
    }
  ]
}



eg:批量根据id查询document
GET /leafproduct/product/_mget
{
  "ids":[ "1", "2","6"]
}

结果：注意id为6的未找到的文档返回结果
{
  "docs" : [
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "1",
      "_version" : 5,
      "found" : true,
      "_source" : {
        "name" : "haifeisi xifalu",
        "desc" : "rousishunhua",
        "price" : 30.99,
        "producer" : "haifeisi_PRODUCER",
        "tags" : [
          "roushui",
          "sihua"
        ]
      }
    },
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "2",
      "_version" : 1,
      "found" : true,
      "_source" : {
        "name" : "oulaiya xifalu",
        "desc" : "buxiu",
        "price" : 49.5,
        "producer" : "oulaiya",
        "tags" : [
          "roushui",
          "oulaiya"
        ]
      }
    },
    {
      "_index" : "leafproduct",
      "_type" : "product",
      "_id" : "6",
      "found" : false
    }
  ]
}

2.批量更新 bulk多命令批量操作（批处理）

(1）.一些解释

a.可以进行如下操作
    delete: 删除文档
    create: 创建一个文档（创建文档存在时(id冲突),会报错，和其他操作一起执行是不影响其他操作）
    update: 更新文档（partial update-部分更新）
    index: 类似PUT 操作，全量替换操作。
Tips：
    create 和index的区别 ： 如果数据存在，使用create操作失败，会提示文档已经存在，使用index则可以成功执行。
b.bulk批处理数据量
    bulk会把将要处理的数据载入内存中，所以数据量是有限制的，最佳的数据量不是一个确定的数值，它取决于你的硬件，你的文档大小以及复杂性，你的索引以及搜索的负载。
    一般建议是1000-5000个文档,5-15M大小的文档量

（2）.实操

eg: 
批量操作：
删除一个已存在的document
创建一个新的document
创建一个已存在的document
更新一个存在的document
index不演示，自己可以尝试下

语法：
POST _bulk
{"delete":{"_index":"leafproduct","_type":"product","_id":3}}
{"create":{"_index":"leafproduct","_type":"product","_id":4}}
{"name":"测试id-4","desc":"测试创建","price":559.5,"producer":"leaf","tags":["测试","你好"]}
{"create":{"_index":"leafproduct","_type":"product","_id":4}}
{"name":"测试重复创建id-4","desc":"测试创建","price":559.5,"producer":"leaf","tags":["测试","你好"]}
{"update":{"_index":"leafproduct","_type":"product","_id":4}}
{"doc":{"tags":["测试","更新4的tags"]}}

结果：
{
  "took" : 385,
  "errors" : true,
  "items" : [
    {
      "delete" : {
        "_index" : "leafproduct",
        "_type" : "product",
        "_id" : "3",
        "_version" : 2,
        "result" : "deleted",
        "_shards" : {
          "total" : 2,
          "successful" : 1,
          "failed" : 0
        },
        "_seq_no" : 1,
        "_primary_term" : 2,
        "status" : 200
      }
    },
    {
      "create" : {
        "_index" : "leafproduct",
        "_type" : "product",
        "_id" : "4",
        "_version" : 1,
        "result" : "created",
        "_shards" : {
          "total" : 2,
          "successful" : 1,
          "failed" : 0
        },
        "_seq_no" : 3,
        "_primary_term" : 2,
        "status" : 201
      }
    },
    {
      "create" : {
        "_index" : "leafproduct",
        "_type" : "product",
        "_id" : "4",
        "status" : 409,
        "error" : {
          "type" : "version_conflict_engine_exception",
          "reason" : "[product][4]: version conflict, document already exists (current version [1])",
          "index_uuid" : "VrTaZV-GTKG01GxAtBUH8A",
          "shard" : "2",
          "index" : "leafproduct"
        }
      }
    },
    {
      "update" : {
        "_index" : "leafproduct",
        "_type" : "product",
        "_id" : "4",
        "_version" : 2,
        "result" : "updated",
        "_shards" : {
          "total" : 2,
          "successful" : 1,
          "failed" : 0
        },
        "_seq_no" : 4,
        "_primary_term" : 2,
        "status" : 200
      }
    }
  ]
}

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。