😊 @ 作者: 一恍过去



目录

  • 1、pom引入
  • 2、配置类
  • 3、数据准备
  • 4、 基本概念
  • 5、 聚合为桶(分组查询)
  • 6、聚合指标(聚合计算)
  • 7、桶内指标(分组聚合计算)


1、pom引入

<dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.7</version>
        </dependency>	
		<dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>7.8.0</version>
        </dependency>
        <!-- elasticsearch的客户端 -->
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>7.8.0</version>
        </dependency>
        <!-- elasticsearch依赖2.x的log4j -->
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
            <version>2.8.2</version>
        </dependency>
    </dependencies>

2、配置类

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

/**
 * @Author: 
 * @Date: 2022/8/13 10:47
 * @Description:
 **/
@Configuration
public class ElasticsearchConfig {

    @Bean
    public RestHighLevelClient restHighLevelClient() {
        return new RestHighLevelClient(
                // 配置ES连接地址
                RestClient.builder(new HttpHost("192.168.80.121", 9200, "http"))
        );
    }
}

3、数据准备

新增索引:

# 新增索引
PUT http://192.168.80.121:9200/cars

# 请求参数
{
  "settings": {
    "number_of_shards": 2,
    "number_of_replicas": 1
  },
  "mappings": {
      "properties": {
        "color": {
          "type": "keyword"
        },
        "make": {
          "type": "keyword"
        },
        "price": {
          "type": "float"
        },
          "sold": {
          "type": "keyword"
        }
      }
    }
}

批量新增数:

参考:《使用Http请求实现数据的批量导入》

# 批量导入数
POST http://192.168.80.121:9200/cars/_bulk

# 注意:必须换行

{"index": {"_index": "cars", "_type": "_doc", "_id": 1}}
{ "price" : 10000, "color" : "red", "make" : "honda", "sold" : "2022-10-28" }

{"index": {"_index": "cars", "_type": "_doc", "_id": 2}}
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2022-11-05" }

{"index": {"_index": "cars", "_type": "_doc", "_id": 3}}
{ "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2022-05-18" }

{"index": {"_index": "cars", "_type": "_doc", "_id": 4}}
{ "price" : 15000, "color" : "blue", "make" : "toyota", "sold" : "2022-07-02" }

{"index": {"_index": "cars", "_type": "_doc", "_id": 5}}
{ "price" : 12000, "color" : "green", "make" : "toyota", "sold" : "2022-08-19" }

{"index": {"_index": "cars", "_type": "_doc", "_id": 6}}
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2022-11-05" }

{"index": {"_index": "cars", "_type": "_doc", "_id": 7}}
{ "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2022-01-01" }

{"index": {"_index": "cars", "_type": "_doc", "_id": 8}}
{ "price" : 25000, "color" : "blue", "make" : "ford", "sold" : "2022-02-12" }

验证数据:

# 验证
GET http://192.168.80.121:9200/cars/_search

4、 基本概念

注意在ES中,需要进行聚合、排序、过滤的字段其处理方式比较特殊,因此不能被分词,设置文本类型为keyword。

基本数据格式如下

Elasticsearch中的聚合,包含多种类型,最常用的两种,一个叫,一个叫指标(度量):

桶(bucket)

桶的作用,是按照某种方式对数据进行分组(group by),每一组数据在ES中称为一个

度量(metrics)

分组完成以后,我们一般会对组中的数据进行聚合运算,例如求平均值、最大、最小、求和等,这些在ES中称为度量

比较常用的一些度量聚合方式:

  • avg :求平均值
  • max :求最大值
  • min :求最小值
  • percentiles :求百分比
  • stats :同时返回avg、max、min、sum、count等
  • sum :求和
  • Top hits :求前几
  • Count:求总数

5、 聚合为桶(分组查询)

我们按照 汽车的颜色color来划分

请求:

@Api(tags = "查询操作")
@RestController
@RequestMapping("/query")
@Slf4j
public class QueryController {
    @Resource
    private RestHighLevelClient restHighLevelClient;

	        /**
         * 聚合分组查询
         *
         * @throws IOException
         */
        @ApiOperation(value = "聚合分组查询", notes = "聚合分组查询")
        @GetMapping("/group")
        public void group() throws IOException {
            SearchRequest request = new SearchRequest();
            // 查询索引为nba的数据
            request.indices("cars");
    
            // 对color字段进行分组
            SearchSourceBuilder builder = new SearchSourceBuilder();
            // 如果只关心分组数据,将结果集设置为0,即不展示hits中的数据
            builder.size(0);
            // 设置分组名称为`colorGroup`,并且结果数量进行排序,false:表示desc,true表示asc
            AggregationBuilder aggregationBuilder = AggregationBuilders.terms("colorGroup").field("color").order(BucketOrder.count(false));
            builder.aggregation(aggregationBuilder);
    
    
            // 执行查询
            request.source(builder);
            SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
            // 获取数据
            Aggregations aggregations = response.getAggregations();
            ParsedStringTerms colorGroup = aggregations.get("colorGroup");
            List<? extends Terms.Bucket> buckets = colorGroup.getBuckets();
            for (Terms.Bucket bucket : buckets) {
                System.out.println("color:" + bucket.getKey() + "," + "count:" + bucket.getDocCount());
            }
        }
}

响应:

color:red,count:4
color:blue,count:2
color:green,count:2

6、聚合指标(聚合计算)

聚合指标是指直接对所有数据进行聚合,不进行分组查询;聚合方式为:avg、max、min、sum 、stats 、percentiles

请求:

@Api(tags = "查询操作")
@RestController
@RequestMapping("/query")
@Slf4j
public class QueryController {
    @Resource
    private RestHighLevelClient restHighLevelClient;
    
    /**
     * 聚合计算查询
     *
     * @throws IOException
     */
    @ApiOperation(value = "聚合计算查询", notes = "聚合计算查询")
    @GetMapping("/aggs")
    public void aggs() throws IOException {
        SearchRequest request = new SearchRequest();
        // 查询索引为nba的数据
        request.indices("cars");

        // 对price字段求平均值
        SearchSourceBuilder builder = new SearchSourceBuilder();
        // 如果只关心分组数据,将结果集设置为0,即不展示hits中的数据
        builder.size(0);
        AggregationBuilder aggregationBuilder = AggregationBuilders.avg("avgPrice").field("price");
        builder.aggregation(aggregationBuilder);

        // 执行查询
        request.source(builder);
        SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        // 获取数据
        Aggregations aggregations = response.getAggregations();
        Avg avgPrice = aggregations.get("avgPrice");
        double value = avgPrice.getValue();
        System.out.println("平均值为:" + value);
    }

}

响应:

平均值为:26500.0

7、桶内指标(分组聚合计算)

是指对某个字段进行分组后再进行聚合计算;聚合方式为:avg、max、min、sum 、stats 、percentiles

请求:

@Api(tags = "查询操作")
@RestController
@RequestMapping("/query")
@Slf4j
public class QueryController {
    @Resource
    private RestHighLevelClient restHighLevelClient;

    /**
     * 分组聚合计算查询
     *
     * @throws IOException
     */
    @ApiOperation(value = "分组聚合计算查询", notes = "分组聚合计算查询")
    @GetMapping("/aggsGroup")
    public void aggsGroup() throws IOException {
        SearchRequest request = new SearchRequest();
        // 查询索引为nba的数据
        request.indices("cars");

        // 对price字段求平均值
        SearchSourceBuilder builder = new SearchSourceBuilder();
        // 如果只关心分组数据,将结果集设置为0,即不展示hits中的数据
        builder.size(0);
        // 设置分组名称为`colorGroup`,并且结果数量进行排序,false:表示desc,true表示asc
        AggregationBuilder aggregationBuilder = AggregationBuilders.terms("colorGroup").field("color").order(BucketOrder.count(false));
        builder.aggregation(aggregationBuilder);

        // 对分组结果进行聚合计算,求分组后的平均值
        AggregationBuilder avgBuilder = AggregationBuilders.avg("avgPrice").field("price");
        aggregationBuilder.subAggregation(avgBuilder);


        // 执行查询
        request.source(builder);
        SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        // 获取数据
        Aggregations aggregations = response.getAggregations();
        ParsedStringTerms colorGroup = aggregations.get("colorGroup");
        List<? extends Terms.Bucket> buckets = colorGroup.getBuckets();
        for (Terms.Bucket bucket : buckets) {
            // 获取分组后的聚合计算数据
            Aggregations acgAggregations = bucket.getAggregations();
            Avg avgPrice = acgAggregations.get("avgPrice");
            double value = avgPrice.getValue();
            System.out.println("color:" + bucket.getKey() + "," + "count:" + bucket.getDocCount() + "," + "avg:" + value);
        }
    }

}

响应:

color:red,count:4,avg:32500.0
color:blue,count:2,avg:20000.0
color:green,count:2,avg:21000.0