Elasticsearch父子关系
5.x参考官网join介绍

注意:Elasticsearch的关联关系,必须要在同一个索引里面,而且父子关系必须在同一个分片中。
es6.x版本规定一个索引中只能有一个type,这就导致了,父子关系就必须放在同一个type中。

例子:本博客,主要两张表:
t_article(id,文章标题,摘要,分类)
t_article_contents(正文内容,t_article的id)。
需求:要求分类id为1的并且标题或摘要或正文内容,带有“山”的关键字的文章。
SELECT
    *
FROM
    t_article t
LEFT JOIN t_article_contents t1 ON t.id = t1.article_id
WHERE
    t.category_id = 1
AND (
    t.title LIKE '%山%'
    OR t.summary LIKE '%山%'
    OR t1.contents LIKE '%山%'
)

方案一:在业务层实现,
1.先t_article查询category_id ==1的文章
2.查询t_article_contents中contents有“山”,并article_id==步骤1的值
3.查询t_article查询category_id ==1,并且summary或title有“山”或id为步骤2的值 

方案二:关联查询
由于这里为了使用RestHighLevel(es5.6开始支持),使用了es6.3.2版本,
确发现es6.x以上一个index中只能有1个type。这就导致了t_article表和
t_article_contents表必须放在同一个index,同一个type。

方案三:嵌套json
将t_article_contents嵌套在t_article
{
  "title": "你大爷",
  "summary": "你大爷 哇哈哈 66666",
  "publish_time": "2018-08-29 15:56",
  "states": 1,
  "creator": "丁D",
  "click_num": 222,
  "thumb_url": "/upload/shan.jpg",
  "category_id": 1,
  "t_article_contents": {
    "contents": "娃哈哈哈哈哈哈哈哈"
  }
}

注意:由于t_article和t_article_contents只能在同一个type中。由于id就必须唯一.所以mysql中的id不能直接同在es中。

方案二

创建idnex,设置mapping,定义父子关系t_article为父,t_article_contents为子
127.0.0.1:9200/blog/
{
  "mappings": {
    "t_article": {
      "properties": {
        "publish_time":{
        "type":"text",
        "fielddata":true
        },
        "join_field": { 
          "type": "join",
          "relations": {
            "t_article": "t_article_contents" 
          }
        }
      }
    }
  }
}

插入两条父数据
127.0.0.1:9200/blog/t_article/1
{
"title":"山有木兮",
"summary":"《山有木兮》是橙光游戏《人鱼传说之长生烛》的主题曲,执素兮谱曲,顾聆落填词演唱者伦桑,于2016年8月12日正式发行",
"publish_time":"2018-08-29 15:56",
"category_id":1,
"join_field": {
    "name": "t_article" 
  }
}


127.0.0.1:9200/blog/t_article/2
{
"title":"你大爷",
"summary":"你大爷 哇哈哈 66666",
"publish_time":"2018-08-29 15:56",
"category_id":1,
"join_field": {
    "name": "t_article" 
  }
}




//中文分词器使用ik
127.0.0.1:9200/blog/
{
  "mappings": {
    "t_article": {

        "_all": {
            "analyzer": "ik_max_word",
            "search_analyzer": "ik_max_word",
            "term_vector": "no",
            "store": "false"
        },

        "properties": {
            "publish_time":{
            "type":"text",
            "fielddata":true
            },
            "join_field": { 
              "type": "join",
              "relations": {
                "t_article": "t_article_contents" 
              }
            },

            "title": {
                "type": "text",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word",
                "boost": 8
            }

            ,

            "summary": {
                "type": "text",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word",
                "boost": 8
            }
            ,

            "contents": {
                "type": "text",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word",
                "boost": 8
            }

      }
    }
  }
}

创建数据时使用了routing=1参数,所以后续getByid,update,delete都需要指定routing参数,否则ES按照默认路由规则去找shard,自然找不到了。

为id==1的文章插入两条子数据

127.0.0.1:9200/blog/t_article/3?routing=1
{
"contents":"娃哈哈哈哈哈哈哈哈",
"join_field": {
    "name": "t_article_contents",
    "parent": 1
  }
}

127.0.0.1:9200/blog/t_article/4?routing=1
{
"contents":"  沿着路灯一个人走回家 和老朋友打电话 你那里天气好吗",
"join_field": {
    "name": "t_article_contents",
    "parent": 1
  }
}

使用 RestHighLevel

<!--elasticsearch  start-->
        <!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch -->
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>6.3.2</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client -->
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>6.3.2</version>
        </dependency>
        <!--elasticsearch  end-->

    /**
     * 构建查询信息
     * @return
     */
    public void buildSearchPage(ModelAndView mv,Long categoryId,String searchKey) throws IOException {
        RestClientBuilder restClient = RestClient.builder(
                new HttpHost("localhost", 9200, "http"));
        RestHighLevelClient client = new RestHighLevelClient(restClient);

        // 创建并设置SearchSourceBuilder对象
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        BoolQueryBuilder  boolQueryBuilder = new BoolQueryBuilder();
        BoolQueryBuilder  filterQueryBuilder = new BoolQueryBuilder();
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("category_id",categoryId);
        filterQueryBuilder.must(termQueryBuilder);
        filterQueryBuilder.should(QueryBuilders.matchQuery("title",searchKey));
        filterQueryBuilder.should(QueryBuilders.matchQuery("summary",searchKey));
        HasChildQueryBuilder hasChildQueryBuilder = new HasChildQueryBuilder(
                "t_article_contents",QueryBuilders.matchQuery("contents",searchKey), ScoreMode.None);
        filterQueryBuilder.should(hasChildQueryBuilder);

        boolQueryBuilder.filter(filterQueryBuilder);
        searchSourceBuilder.query(boolQueryBuilder);
        searchSourceBuilder.from(0);
        // 每页多少条数据
        searchSourceBuilder.size(1000);
        // 设置排序规则
        searchSourceBuilder.sort("publish_time", SortOrder.DESC);
        // 设置超时时间为2s
        searchSourceBuilder.timeout(new TimeValue(2000));

        // 创建并设置SearchRequest对象
        SearchRequest searchRequest = new SearchRequest();
        // 设置request要搜索的索引和类型
        searchRequest.indices("blog").types("t_article");
        searchRequest.source(searchSourceBuilder);
        SearchResponse searchResponse = client.search(searchRequest);
        System.out.printf(searchResponse.toString());
        client.close();
    }

转成DSL
{
  "from": 0,
  "size": 1000,
  "timeout": "2000ms",
  "query": {
    "bool": {
      "filter": [
        {
          "bool": {
            "must": [
              {
                "term": {
                  "category_id": {
                    "value": 1,
                    "boost": 1.0
                  }
                }
              }
            ],
            "should": [
              {
                "match": {
                  "title": {
                    "query": "山",
                    "operator": "OR",
                    "prefix_length": 0,
                    "max_expansions": 50,
                    "fuzzy_transpositions": true,
                    "lenient": false,
                    "zero_terms_query": "NONE",
                    "auto_generate_synonyms_phrase_query": true,
                    "boost": 1.0
                  }
                }
              },
              {
                "match": {
                  "summary": {
                    "query": "山",
                    "operator": "OR",
                    "prefix_length": 0,
                    "max_expansions": 50,
                    "fuzzy_transpositions": true,
                    "lenient": false,
                    "zero_terms_query": "NONE",
                    "auto_generate_synonyms_phrase_query": true,
                    "boost": 1.0
                  }
                }
              },
              {
                "has_child": {
                  "query": {
                    "match": {
                      "contents": {
                        "query": "山",
                        "operator": "OR",
                        "prefix_length": 0,
                        "max_expansions": 50,
                        "fuzzy_transpositions": true,
                        "lenient": false,
                        "zero_terms_query": "NONE",
                        "auto_generate_synonyms_phrase_query": true,
                        "boost": 1.0
                      }
                    }
                  },
                  "type": "t_article_contents",
                  "score_mode": "none",
                  "min_children": 0,
                  "max_children": 2147483647,
                  "ignore_unmapped": false,
                  "boost": 1.0
                }
              }
            ],
            "adjust_pure_negative": true,
            "boost": 1.0
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1.0
    }
  },
  "sort": [
    {
      "publish_time": {
        "order": "desc"
      }
    }
  ]
}


可以使用inner_hits在查询父数据同时带出子数据(或子带出父)。
inner_hits默认只能查询3条数据,需要查询更多要自己设置from和size

              {
                "has_child": {
                  "query": {
                    "match": {
                      "contents": {
                        "query": "山",
                        "operator": "OR",
                        "prefix_length": 0,
                        "max_expansions": 50,
                        "fuzzy_transpositions": true,
                        "lenient": false,
                        "zero_terms_query": "NONE",
                        "auto_generate_synonyms_phrase_query": true,
                        "boost": 1.0
                      }
                    }
                  },
                  "inner_hits":{},
                  "type": "t_article_contents",
                  "score_mode": "none",
                  "min_children": 0,
                  "max_children": 2147483647,
                  "ignore_unmapped": false,
                  "boost": 1.0
                }
              }

logstash导入es关联关系

input { 

    jdbc {
        jdbc_driver_library => "/usr/local/logstash-6.3.2/lib/mysql-connector-java-5.1.29.jar"
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/blog"
        jdbc_user => "root"
        jdbc_password => "root#$%123"
        # statement_filepath => "filename.sql"
        statement => "SELECT * from t_article where test_time> :sql_last_value order by test_time asc"
        jdbc_paging_enabled => "true"
        jdbc_page_size => "10"
        jdbc_default_timezone =>"Asia/Shanghai"
        type => "t_article"

        tracking_column_type => "timestamp" 
        use_column_value => true
        tracking_column => "test_time"
        record_last_run  => true
        last_run_metadata_path => "/usr/local/logstash-6.3.2/config/last_run_metadata.txt"
        schedule => "* * * * *"
    }

    jdbc {
        jdbc_driver_library => "/usr/local/logstash-6.3.2/lib/mysql-connector-java-5.1.29.jar"
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/blog"
        jdbc_user => "root"
        jdbc_password => "root#$%123"
        # statement_filepath => "filename.sql"
        statement => "SELECT * from t_article_contents where test_time> :sql_last_value order by test_time asc"
        jdbc_paging_enabled => "true"
        jdbc_page_size => "10"
        jdbc_default_timezone =>"Asia/Shanghai"
        type => "t_article_contents"

        tracking_column_type => "timestamp" 
        use_column_value => true
        tracking_column => "test_time"
        record_last_run  => true
        last_run_metadata_path => "/usr/local/logstash-6.3.2/config/last_run_content_metadata.txt"
        schedule => "* * * * *"
    }
}

filter {

    if [type] == "t_article" {

        ruby{
            code => '
                event.set("test333333333333",111);

                field = {
                    "todo_id" => event.get("test_time")
                }

                join_field = {
                    "name": "t_article"
                }

                event.set("join_field",join_field)
            '
        }

        json {
            source => "message"
            remove_field => ["message"]
        }

        json {
            source => "{\"test\" : \"test1\"}"
            target =>"aaaaaa"
        }

        mutate {
            add_field => { 
                "join_field11" => "%{aaaaaa}" 
                "join_field22" => "%{test_time}"


            }
        }
    }

    if [type] == "t_article_contents" {

        ruby {

            code =>'
                join_field = {
                    "name" => "t_article_contents","parent" => "%{article_id}"
                } 

                event.set("join_field",join_field)
            '

        }    

        json {
            source => "message"
            remove_field => ["message"]
        }
    }
}


output {

    if [type] == "t_article" {

        elasticsearch { 

            hosts => ["39.108.231.144:9200"]
            document_type  => "t_article"
            index => "blog"
            document_id => "%{id}"
        }
    }

    if [type] == "t_article_contents" {

        elasticsearch { 
            hosts => ["39.108.231.144:9200"]
            document_type  => "t_article"
            index => "blog"
            document_id => "%{id}"
            routing => "%{article_id}"
        }

    }
}

删除查询的数据

post http://www.582466.top:9200/blog/_delete_by_query
{
  "query": {
    "match_all": {}
  }
}

方案三:嵌套json

不管是修改t_article还是修改t_article_contents都要全部取出来修改。

从mysql导入es的时候要在两张表的数据,整成嵌套json,可以使用插件logstash-filter-aggregate