1.terms
该分桶策略最简单,直接按照term来分桶,如果是text类型,则按照分词后的结果分桶(field的值需要设置为field_name.keyword),如对salary字段进行分桶。
POST /employee/_search
{
"size": 0,
"aggs": {
"salary_terms": {
"terms": {
"field": "salary",
"size": 10
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"aggs_name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 20000.0,
"doc_count" : 2
},
{
"key" : 15000.0,
"doc_count" : 1
},
{
"key" : 18000.0,
"doc_count" : 1
},
{
"key" : 28000.0,
"doc_count" : 1
},
{
"key" : 29000.0,
"doc_count" : 1
},
{
"key" : 30000.0,
"doc_count" : 1
},
{
"key" : 50000.0,
"doc_count" : 1
}
]
}
}
}
2.range
通过指定数值的范围来设定分桶规则,如对salary字段按照小于18000、大于等于18000并且小于30000、大于等于30000三种情况分桶。
POST /employee/_search
{
"size": 0,
"aggs": {
"salary_range": {
"range": {
"field": "salary",
"ranges": [{
"key": "小于18000",
"to": 18000
},
{
"key": "大于等于18000并且小于30000",
"from": 20000,
"to": 30000
},
{
"key": "大于等于30000",
"from": 30000
}
]
}
}
}
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"salary_range" : {
"buckets" : [
{
"key" : "小于18000",
"to" : 18000.0,
"doc_count" : 1
},
{
"key" : "大于等于18000并且小于30000",
"from" : 20000.0,
"to" : 30000.0,
"doc_count" : 4
},
{
"key" : "大于等于30000",
"from" : 30000.0,
"doc_count" : 2
}
]
}
}
}
3.date_range
通过指定日期的范围来设定分桶规则,如对birthday字段按照"1980-9990"以及"1990-2020"两个时间段来分桶。
POST /employee/_search
{
"size": 0,
"aggs": {
"birthday_date_range": {
"date_range": {
"field": "birthday",
"format": "yyyy",
"ranges": [{
"from": "1980",
"to": "1990"
},
{
"from": "1990",
"to": "2020"
}
]
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 7,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"birthday_date_range" : {
"buckets" : [
{
"key" : "1980-1990",
"from" : 3.155328E11,
"from_as_string" : "1980",
"to" : 6.31152E11,
"to_as_string" : "1990",
"doc_count" : 2
},
{
"key" : "1990-2020",
"from" : 6.31152E11,
"from_as_string" : "1990",
"to" : 1.5778368E12,
"to_as_string" : "2020",
"doc_count" : 5
}
]
}
}
}
from和to在指定日期时,可以使用date match。
4.historgram
直方图,以固定间隔的策略来分割数据,如对salary字段按照5000的间隔进行分桶。
POST /employee/_search
{
"size": 0,
"aggs": {
"salary_histogram": {
"histogram": {
"field": "salary",
"interval": 5000,
"min_doc_count": 1,
"extended_bounds": {
"min": 10000,
"max": 50000
}
}
}
}
}
- interval:间隔数值
- min_doc_count:直方图区间内最小文档个数
- extended_bounds:指定数据范围
{
"took" : 14,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"salary_histogram" : {
"buckets" : [
{
"key" : 15000.0,
"doc_count" : 2
},
{
"key" : 20000.0,
"doc_count" : 2
},
{
"key" : 25000.0,
"doc_count" : 2
},
{
"key" : 30000.0,
"doc_count" : 1
},
{
"key" : 50000.0,
"doc_count" : 1
}
]
}
}
}
5.date_historgram
针对日期的直方图或者柱状图,是时序数据分析中常用的聚合分析类型,如对birthday字段按照3660天(10年)的间隔进行分桶。
POST /employee/_search
{
"size": 0,
"aggs": {
"birthday_date_histogram": {
"date_histogram": {
"field": "birthday",
"fixed_interval": "3660d",
"min_doc_count": 1,
"format": "yyyy",
"extended_bounds": {
"min": 1980,
"max": 2020
}
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 7,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"birthday_date_histogram" : {
"buckets" : [
{
"key_as_string" : "1980",
"key" : 316224000000,
"doc_count" : 2
},
{
"key_as_string" : "1990",
"key" : 632448000000,
"doc_count" : 5
}
]
}
}
}
6.Bucket + Metric
(1).简介
bucket聚合分析允许通过添加子分析来进一步分析,该子分析可以是metric也可以是bucket。
(2).分桶后再分桶
如先对职位进行分桶,然后再对年龄进行分桶。
POST /employee/_search
{
"size": 0,
"aggs": {
"job_terms": {
"terms": {
"field": "job",
"size": 10
},
"aggs": {
"age_range": {
"range": {
"field": "age",
"ranges": [{
"to": 25
},
{
"from": 25,
"to": 35
},
{
"from": 35
}
]
}
}
}
}
}
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 7,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"job_terms" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java engineer",
"doc_count" : 4,
"age_range" : {
"buckets" : [
{
"key" : "*-25.0",
"to" : 25.0,
"doc_count" : 1
},
{
"key" : "25.0-35.0",
"from" : 25.0,
"to" : 35.0,
"doc_count" : 3
},
{
"key" : "35.0-*",
"from" : 35.0,
"doc_count" : 0
}
]
}
},
{
"key" : "Vue engineer",
"doc_count" : 2,
"age_range" : {
"buckets" : [
{
"key" : "*-25.0",
"to" : 25.0,
"doc_count" : 0
},
{
"key" : "25.0-35.0",
"from" : 25.0,
"to" : 35.0,
"doc_count" : 2
},
{
"key" : "35.0-*",
"from" : 35.0,
"doc_count" : 0
}
]
}
},
{
"key" : "Technical director",
"doc_count" : 1,
"age_range" : {
"buckets" : [
{
"key" : "*-25.0",
"to" : 25.0,
"doc_count" : 0
},
{
"key" : "25.0-35.0",
"from" : 25.0,
"to" : 35.0,
"doc_count" : 0
},
{
"key" : "35.0-*",
"from" : 35.0,
"doc_count" : 1
}
]
}
}
]
}
}
}
(3).分桶后进行数据分析
先对职位进行分桶,然后再对薪水进行分析。
POST /employee/_search
{
"size": 0,
"aggs": {
"job_terms": {
"terms": {
"field": "job",
"size": 10
},
"aggs": {
"salary_stats": {
"stats": {
"field": "salary"
}
}
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 7,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"job_terms" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java engineer",
"doc_count" : 4,
"salary_stats" : {
"count" : 4,
"min" : 15000.0,
"max" : 30000.0,
"avg" : 23500.0,
"sum" : 94000.0
}
},
{
"key" : "Vue engineer",
"doc_count" : 2,
"salary_stats" : {
"count" : 2,
"min" : 18000.0,
"max" : 28000.0,
"avg" : 23000.0,
"sum" : 46000.0
}
},
{
"key" : "Technical director",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 50000.0,
"max" : 50000.0,
"avg" : 50000.0,
"sum" : 50000.0
}
}
]
}
}
}