😊 @ 作者: 一恍过去
目录
- 1、pom引入
- 2、配置类
- 3、数据准备
- 4、 基本概念
- 5、 聚合为桶(分组查询)
- 6、聚合指标(聚合计算)
- 7、桶内指标(分组聚合计算)
1、pom引入
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.7</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.8.0</version>
</dependency>
<!-- elasticsearch的客户端 -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.8.0</version>
</dependency>
<!-- elasticsearch依赖2.x的log4j -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8.2</version>
</dependency>
</dependencies>
2、配置类
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* @Author:
* @Date: 2022/8/13 10:47
* @Description:
**/
@Configuration
public class ElasticsearchConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
return new RestHighLevelClient(
// 配置ES连接地址
RestClient.builder(new HttpHost("192.168.80.121", 9200, "http"))
);
}
}
3、数据准备
新增索引:
# 新增索引
PUT http://192.168.80.121:9200/cars
# 请求参数
{
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"color": {
"type": "keyword"
},
"make": {
"type": "keyword"
},
"price": {
"type": "float"
},
"sold": {
"type": "keyword"
}
}
}
}
批量新增数:
参考:《使用Http请求实现数据的批量导入》
# 批量导入数
POST http://192.168.80.121:9200/cars/_bulk
# 注意:必须换行
{"index": {"_index": "cars", "_type": "_doc", "_id": 1}}
{ "price" : 10000, "color" : "red", "make" : "honda", "sold" : "2022-10-28" }
{"index": {"_index": "cars", "_type": "_doc", "_id": 2}}
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2022-11-05" }
{"index": {"_index": "cars", "_type": "_doc", "_id": 3}}
{ "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2022-05-18" }
{"index": {"_index": "cars", "_type": "_doc", "_id": 4}}
{ "price" : 15000, "color" : "blue", "make" : "toyota", "sold" : "2022-07-02" }
{"index": {"_index": "cars", "_type": "_doc", "_id": 5}}
{ "price" : 12000, "color" : "green", "make" : "toyota", "sold" : "2022-08-19" }
{"index": {"_index": "cars", "_type": "_doc", "_id": 6}}
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2022-11-05" }
{"index": {"_index": "cars", "_type": "_doc", "_id": 7}}
{ "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2022-01-01" }
{"index": {"_index": "cars", "_type": "_doc", "_id": 8}}
{ "price" : 25000, "color" : "blue", "make" : "ford", "sold" : "2022-02-12" }
验证数据:
# 验证
GET http://192.168.80.121:9200/cars/_search
4、 基本概念
注意:在ES中,需要进行聚合、排序、过滤的字段其处理方式比较特殊,因此不能被分词,设置文本类型为keyword。
基本数据格式如下
Elasticsearch中的聚合,包含多种类型,最常用的两种,一个叫桶
,一个叫指标
(度量):
桶(bucket)
桶的作用,是按照某种方式对数据进行分组(group by),每一组数据在ES中称为一个桶
。
度量(metrics)
分组完成以后,我们一般会对组中的数据进行聚合运算,例如求平均值、最大、最小、求和等,这些在ES中称为度量
比较常用的一些度量聚合方式:
- avg :求平均值
- max :求最大值
- min :求最小值
- percentiles :求百分比
- stats :同时返回avg、max、min、sum、count等
- sum :求和
- Top hits :求前几
- Count:求总数
5、 聚合为桶(分组查询)
我们按照 汽车的颜色
color
来划分桶
请求:
@Api(tags = "查询操作")
@RestController
@RequestMapping("/query")
@Slf4j
public class QueryController {
@Resource
private RestHighLevelClient restHighLevelClient;
/**
* 聚合分组查询
*
* @throws IOException
*/
@ApiOperation(value = "聚合分组查询", notes = "聚合分组查询")
@GetMapping("/group")
public void group() throws IOException {
SearchRequest request = new SearchRequest();
// 查询索引为nba的数据
request.indices("cars");
// 对color字段进行分组
SearchSourceBuilder builder = new SearchSourceBuilder();
// 如果只关心分组数据,将结果集设置为0,即不展示hits中的数据
builder.size(0);
// 设置分组名称为`colorGroup`,并且结果数量进行排序,false:表示desc,true表示asc
AggregationBuilder aggregationBuilder = AggregationBuilders.terms("colorGroup").field("color").order(BucketOrder.count(false));
builder.aggregation(aggregationBuilder);
// 执行查询
request.source(builder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
// 获取数据
Aggregations aggregations = response.getAggregations();
ParsedStringTerms colorGroup = aggregations.get("colorGroup");
List<? extends Terms.Bucket> buckets = colorGroup.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println("color:" + bucket.getKey() + "," + "count:" + bucket.getDocCount());
}
}
}
响应:
color:red,count:4
color:blue,count:2
color:green,count:2
6、聚合指标(聚合计算)
聚合指标
是指直接对所有数据进行聚合,不进行分组查询;聚合方式为:avg、max、min、sum 、stats 、percentiles
请求:
@Api(tags = "查询操作")
@RestController
@RequestMapping("/query")
@Slf4j
public class QueryController {
@Resource
private RestHighLevelClient restHighLevelClient;
/**
* 聚合计算查询
*
* @throws IOException
*/
@ApiOperation(value = "聚合计算查询", notes = "聚合计算查询")
@GetMapping("/aggs")
public void aggs() throws IOException {
SearchRequest request = new SearchRequest();
// 查询索引为nba的数据
request.indices("cars");
// 对price字段求平均值
SearchSourceBuilder builder = new SearchSourceBuilder();
// 如果只关心分组数据,将结果集设置为0,即不展示hits中的数据
builder.size(0);
AggregationBuilder aggregationBuilder = AggregationBuilders.avg("avgPrice").field("price");
builder.aggregation(aggregationBuilder);
// 执行查询
request.source(builder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
// 获取数据
Aggregations aggregations = response.getAggregations();
Avg avgPrice = aggregations.get("avgPrice");
double value = avgPrice.getValue();
System.out.println("平均值为:" + value);
}
}
响应:
平均值为:26500.0
7、桶内指标(分组聚合计算)
是指对
某个
字段进行分组
后再进行聚合计算;聚合方式为:avg、max、min、sum 、stats 、percentiles
请求:
@Api(tags = "查询操作")
@RestController
@RequestMapping("/query")
@Slf4j
public class QueryController {
@Resource
private RestHighLevelClient restHighLevelClient;
/**
* 分组聚合计算查询
*
* @throws IOException
*/
@ApiOperation(value = "分组聚合计算查询", notes = "分组聚合计算查询")
@GetMapping("/aggsGroup")
public void aggsGroup() throws IOException {
SearchRequest request = new SearchRequest();
// 查询索引为nba的数据
request.indices("cars");
// 对price字段求平均值
SearchSourceBuilder builder = new SearchSourceBuilder();
// 如果只关心分组数据,将结果集设置为0,即不展示hits中的数据
builder.size(0);
// 设置分组名称为`colorGroup`,并且结果数量进行排序,false:表示desc,true表示asc
AggregationBuilder aggregationBuilder = AggregationBuilders.terms("colorGroup").field("color").order(BucketOrder.count(false));
builder.aggregation(aggregationBuilder);
// 对分组结果进行聚合计算,求分组后的平均值
AggregationBuilder avgBuilder = AggregationBuilders.avg("avgPrice").field("price");
aggregationBuilder.subAggregation(avgBuilder);
// 执行查询
request.source(builder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
// 获取数据
Aggregations aggregations = response.getAggregations();
ParsedStringTerms colorGroup = aggregations.get("colorGroup");
List<? extends Terms.Bucket> buckets = colorGroup.getBuckets();
for (Terms.Bucket bucket : buckets) {
// 获取分组后的聚合计算数据
Aggregations acgAggregations = bucket.getAggregations();
Avg avgPrice = acgAggregations.get("avgPrice");
double value = avgPrice.getValue();
System.out.println("color:" + bucket.getKey() + "," + "count:" + bucket.getDocCount() + "," + "avg:" + value);
}
}
}
响应:
color:red,count:4,avg:32500.0
color:blue,count:2,avg:20000.0
color:green,count:2,avg:21000.0