Elasticsearch分组集合
一、分组聚合操作
开启fielddata属性
1.在ElasticSearch中默认fielddata默认是false的,因为开启Text的fielddata后对内存的占用很高
如果进行聚合查询时候就需要开启 fielddata 属性,如下:
PUT /leafproduct/_mapping/product
{
"properties": {
"tags":{
"type": "text",
"fielddata":true
}
}
}
2.分组查询(每一个标签tags下有多少个商品)
(1)语法
GET /leafproduct/product/_search
{
"size": 0,
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
}
}
}
}
(2)结果
{
"took" : 29,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_tags" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "roushui",
"doc_count" : 2
},
{
"key" : "sihua",
"doc_count" : 2
},
{
"key" : "lishi",
"doc_count" : 1
},
{
"key" : "oulaiya",
"doc_count" : 1
}
]
}
}
}
3.查询加统计(对name包含"xifalu"的商品,每一个标签下有多少商品)
(1)语法:
GET /leafproduct/product/_search
{
"size": 0,
"query": {
"match": {
"name": "xifalu"
}
},
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
}
}
}
}
(2)结果
{
"took" : 9,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_tags" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "roushui",
"doc_count" : 2
},
{
"key" : "sihua",
"doc_count" : 2
},
{
"key" : "lishi",
"doc_count" : 1
},
{
"key" : "oulaiya",
"doc_count" : 1
}
]
}
}
}
4.集合分析(先按照tags分组,再计算,每个分组下商品价格的平均值,再按照平均价格升序排序)
(1)语法
GET /leafproduct/product/_search
{
"size": 0,
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
, "order": {
"avg_price": "asc"
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
(2)结果
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_tags" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "roushui",
"doc_count" : 2,
"avg_price" : {
"value" : 40.24499988555908
}
},
{
"key" : "oulaiya",
"doc_count" : 1,
"avg_price" : {
"value" : 49.5
}
},
{
"key" : "sihua",
"doc_count" : 2,
"avg_price" : {
"value" : 55.48999881744385
}
},
{
"key" : "lishi",
"doc_count" : 1,
"avg_price" : {
"value" : 79.98999786376953
}
}
]
}
}
}
5.集合分析(先按照price区间分组,再按照tags分组,计算每个分组下商品价格的平均值,再按照平均价格升序排序)
(1)语法
GET /leafproduct/product/_search
{
"size": 0,
"aggs": {
"group_by_price": {
"range": {
"field": "price",
"ranges": [
{
"from": 0,
"to": 30
},
{
"from": 30,
"to": 50
},
{
"from": 50,
"to": 100
}
]
},
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags",
"order": {
"avg_price": "asc"
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
(2)结果:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_by_price" : {
"buckets" : [
{
"key" : "0.0-30.0",
"from" : 0.0,
"to" : 30.0,
"doc_count" : 0,
"group_by_tags" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
},
{
"key" : "30.0-50.0",
"from" : 30.0,
"to" : 50.0,
"doc_count" : 2,
"group_by_tags" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "sihua",
"doc_count" : 1,
"avg_price" : {
"value" : 30.989999771118164
}
},
{
"key" : "roushui",
"doc_count" : 2,
"avg_price" : {
"value" : 40.24499988555908
}
},
{
"key" : "oulaiya",
"doc_count" : 1,
"avg_price" : {
"value" : 49.5
}
}
]
}
},
{
"key" : "50.0-100.0",
"from" : 50.0,
"to" : 100.0,
"doc_count" : 1,
"group_by_tags" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "lishi",
"doc_count" : 1,
"avg_price" : {
"value" : 79.98999786376953
}
},
{
"key" : "sihua",
"doc_count" : 1,
"avg_price" : {
"value" : 79.98999786376953
}
}
]
}
}
]
}
}
}
二、批量查询和批量更新
1.批量查询 mget多文档查询
(1).不指定定某一索引和类型
可以指定不同索引下,不同类型数据,一起批量查询
eg:
GET _mget
{
"docs":[
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "1"
},
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "2"
}
]
}
结果:
{
"docs" : [
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "1",
"_version" : 5,
"found" : true,
"_source" : {
"name" : "haifeisi xifalu",
"desc" : "rousishunhua",
"price" : 30.99,
"producer" : "haifeisi_PRODUCER",
"tags" : [
"roushui",
"sihua"
]
}
},
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "2",
"_version" : 1,
"found" : true,
"_source" : {
"name" : "oulaiya xifalu",
"desc" : "buxiu",
"price" : 49.5,
"producer" : "oulaiya",
"tags" : [
"roushui",
"oulaiya"
]
}
}
]
}
(2).不指定类型
可以指定索引,不指定类型,一起批量查询
eg:
GET /leafproduct/_mget
{
"docs":[
{
"_type" : "product",
"_id" : "1"
},
{
"_type" : "product",
"_id" : "2"
}
]
}
结果:
{
"docs" : [
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "1",
"_version" : 5,
"found" : true,
"_source" : {
"name" : "haifeisi xifalu",
"desc" : "rousishunhua",
"price" : 30.99,
"producer" : "haifeisi_PRODUCER",
"tags" : [
"roushui",
"sihua"
]
}
},
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "2",
"_version" : 1,
"found" : true,
"_source" : {
"name" : "oulaiya xifalu",
"desc" : "buxiu",
"price" : 49.5,
"producer" : "oulaiya",
"tags" : [
"roushui",
"oulaiya"
]
}
}
]
}
(3).固定索引,固定类型,批量根据ID查询,并且只查询某些字段
eg:批量查询id为1 2的document,且 仅查询返回 name,tags字段
GET /leafproduct/product/_mget
{
"docs":[
{
"_id" : "1",
"_source":["name","tags"]
},
{
"_id" : "2",
"_source":["name","tags"]
}
]
}
结果:
{
"docs" : [
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "1",
"_version" : 5,
"found" : true,
"_source" : {
"name" : "haifeisi xifalu",
"tags" : [
"roushui",
"sihua"
]
}
},
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "2",
"_version" : 1,
"found" : true,
"_source" : {
"name" : "oulaiya xifalu",
"tags" : [
"roushui",
"oulaiya"
]
}
}
]
}
eg:批量根据id查询document
GET /leafproduct/product/_mget
{
"ids":[ "1", "2","6"]
}
结果:注意id为6的未找到的文档返回结果
{
"docs" : [
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "1",
"_version" : 5,
"found" : true,
"_source" : {
"name" : "haifeisi xifalu",
"desc" : "rousishunhua",
"price" : 30.99,
"producer" : "haifeisi_PRODUCER",
"tags" : [
"roushui",
"sihua"
]
}
},
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "2",
"_version" : 1,
"found" : true,
"_source" : {
"name" : "oulaiya xifalu",
"desc" : "buxiu",
"price" : 49.5,
"producer" : "oulaiya",
"tags" : [
"roushui",
"oulaiya"
]
}
},
{
"_index" : "leafproduct",
"_type" : "product",
"_id" : "6",
"found" : false
}
]
}
2.批量更新 bulk多命令批量操作(批处理)
(1).一些解释
a.可以进行如下操作
delete: 删除文档
create: 创建一个文档(创建文档存在时(id冲突),会报错,和其他操作一起执行是不影响其他操作)
update: 更新文档(partial update-部分更新)
index: 类似PUT 操作,全量替换操作。
Tips:
create 和index的区别 : 如果数据存在,使用create操作失败,会提示文档已经存在,使用index则可以成功执行。
b.bulk批处理数据量
bulk会把将要处理的数据载入内存中,所以数据量是有限制的,最佳的数据量不是一个确定的数值,它取决于你的硬件,你的文档大小以及复杂性,你的索引以及搜索的负载。
一般建议是1000-5000个文档,5-15M大小的文档量
(2).实操
eg:
批量操作:
删除一个已存在的document
创建一个新的document
创建一个已存在的document
更新一个存在的document
index不演示,自己可以尝试下
语法:
POST _bulk
{"delete":{"_index":"leafproduct","_type":"product","_id":3}}
{"create":{"_index":"leafproduct","_type":"product","_id":4}}
{"name":"测试id-4","desc":"测试创建","price":559.5,"producer":"leaf","tags":["测试","你好"]}
{"create":{"_index":"leafproduct","_type":"product","_id":4}}
{"name":"测试重复创建id-4","desc":"测试创建","price":559.5,"producer":"leaf","tags":["测试","你好"]}
{"update":{"_index":"leafproduct","_type":"product","_id":4}}
{"doc":{"tags":["测试","更新4的tags"]}}
结果:
{
"took" : 385,
"errors" : true,
"items" : [
{
"delete" : {
"_index" : "leafproduct",
"_type" : "product",
"_id" : "3",
"_version" : 2,
"result" : "deleted",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 1,
"_primary_term" : 2,
"status" : 200
}
},
{
"create" : {
"_index" : "leafproduct",
"_type" : "product",
"_id" : "4",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 3,
"_primary_term" : 2,
"status" : 201
}
},
{
"create" : {
"_index" : "leafproduct",
"_type" : "product",
"_id" : "4",
"status" : 409,
"error" : {
"type" : "version_conflict_engine_exception",
"reason" : "[product][4]: version conflict, document already exists (current version [1])",
"index_uuid" : "VrTaZV-GTKG01GxAtBUH8A",
"shard" : "2",
"index" : "leafproduct"
}
}
},
{
"update" : {
"_index" : "leafproduct",
"_type" : "product",
"_id" : "4",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 4,
"_primary_term" : 2,
"status" : 200
}
}
]
}