Elasticsearch父子关系
5.x参考官网join介绍
注意:Elasticsearch的关联关系,必须要在同一个索引里面,而且父子关系必须在同一个分片中。
es6.x版本规定一个索引中只能有一个type,这就导致了,父子关系就必须放在同一个type中。
例子:本博客,主要两张表:
t_article(id,文章标题,摘要,分类)
t_article_contents(正文内容,t_article的id)。
需求:要求分类id为1的并且标题或摘要或正文内容,带有“山”的关键字的文章。
SELECT
*
FROM
t_article t
LEFT JOIN t_article_contents t1 ON t.id = t1.article_id
WHERE
t.category_id = 1
AND (
t.title LIKE '%山%'
OR t.summary LIKE '%山%'
OR t1.contents LIKE '%山%'
)
方案一:在业务层实现,
1.先t_article查询category_id ==1的文章
2.查询t_article_contents中contents有“山”,并article_id==步骤1的值
3.查询t_article查询category_id ==1,并且summary或title有“山”或id为步骤2的值
方案二:关联查询
由于这里为了使用RestHighLevel(es5.6开始支持),使用了es6.3.2版本,
确发现es6.x以上一个index中只能有1个type。这就导致了t_article表和
t_article_contents表必须放在同一个index,同一个type。
方案三:嵌套json
将t_article_contents嵌套在t_article
{
"title": "你大爷",
"summary": "你大爷 哇哈哈 66666",
"publish_time": "2018-08-29 15:56",
"states": 1,
"creator": "丁D",
"click_num": 222,
"thumb_url": "/upload/shan.jpg",
"category_id": 1,
"t_article_contents": {
"contents": "娃哈哈哈哈哈哈哈哈"
}
}
注意:由于t_article和t_article_contents只能在同一个type中。由于id就必须唯一.所以mysql中的id不能直接同在es中。
方案二
创建idnex,设置mapping,定义父子关系t_article为父,t_article_contents为子
127.0.0.1:9200/blog/
{
"mappings": {
"t_article": {
"properties": {
"publish_time":{
"type":"text",
"fielddata":true
},
"join_field": {
"type": "join",
"relations": {
"t_article": "t_article_contents"
}
}
}
}
}
}
插入两条父数据
127.0.0.1:9200/blog/t_article/1
{
"title":"山有木兮",
"summary":"《山有木兮》是橙光游戏《人鱼传说之长生烛》的主题曲,执素兮谱曲,顾聆落填词演唱者伦桑,于2016年8月12日正式发行",
"publish_time":"2018-08-29 15:56",
"category_id":1,
"join_field": {
"name": "t_article"
}
}
127.0.0.1:9200/blog/t_article/2
{
"title":"你大爷",
"summary":"你大爷 哇哈哈 66666",
"publish_time":"2018-08-29 15:56",
"category_id":1,
"join_field": {
"name": "t_article"
}
}
//中文分词器使用ik
127.0.0.1:9200/blog/
{
"mappings": {
"t_article": {
"_all": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"term_vector": "no",
"store": "false"
},
"properties": {
"publish_time":{
"type":"text",
"fielddata":true
},
"join_field": {
"type": "join",
"relations": {
"t_article": "t_article_contents"
}
},
"title": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"boost": 8
}
,
"summary": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"boost": 8
}
,
"contents": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"boost": 8
}
}
}
}
}
创建数据时使用了routing=1参数,所以后续getByid,update,delete都需要指定routing参数,否则ES按照默认路由规则去找shard,自然找不到了。
为id==1的文章插入两条子数据
127.0.0.1:9200/blog/t_article/3?routing=1
{
"contents":"娃哈哈哈哈哈哈哈哈",
"join_field": {
"name": "t_article_contents",
"parent": 1
}
}
127.0.0.1:9200/blog/t_article/4?routing=1
{
"contents":" 沿着路灯一个人走回家 和老朋友打电话 你那里天气好吗",
"join_field": {
"name": "t_article_contents",
"parent": 1
}
}
使用 RestHighLevel
<!--elasticsearch start-->
<!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch -->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.3.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>6.3.2</version>
</dependency>
<!--elasticsearch end-->
/**
* 构建查询信息
* @return
*/
public void buildSearchPage(ModelAndView mv,Long categoryId,String searchKey) throws IOException {
RestClientBuilder restClient = RestClient.builder(
new HttpHost("localhost", 9200, "http"));
RestHighLevelClient client = new RestHighLevelClient(restClient);
// 创建并设置SearchSourceBuilder对象
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
BoolQueryBuilder filterQueryBuilder = new BoolQueryBuilder();
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("category_id",categoryId);
filterQueryBuilder.must(termQueryBuilder);
filterQueryBuilder.should(QueryBuilders.matchQuery("title",searchKey));
filterQueryBuilder.should(QueryBuilders.matchQuery("summary",searchKey));
HasChildQueryBuilder hasChildQueryBuilder = new HasChildQueryBuilder(
"t_article_contents",QueryBuilders.matchQuery("contents",searchKey), ScoreMode.None);
filterQueryBuilder.should(hasChildQueryBuilder);
boolQueryBuilder.filter(filterQueryBuilder);
searchSourceBuilder.query(boolQueryBuilder);
searchSourceBuilder.from(0);
// 每页多少条数据
searchSourceBuilder.size(1000);
// 设置排序规则
searchSourceBuilder.sort("publish_time", SortOrder.DESC);
// 设置超时时间为2s
searchSourceBuilder.timeout(new TimeValue(2000));
// 创建并设置SearchRequest对象
SearchRequest searchRequest = new SearchRequest();
// 设置request要搜索的索引和类型
searchRequest.indices("blog").types("t_article");
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest);
System.out.printf(searchResponse.toString());
client.close();
}
转成DSL
{
"from": 0,
"size": 1000,
"timeout": "2000ms",
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"term": {
"category_id": {
"value": 1,
"boost": 1.0
}
}
}
],
"should": [
{
"match": {
"title": {
"query": "山",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
}
},
{
"match": {
"summary": {
"query": "山",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
}
},
{
"has_child": {
"query": {
"match": {
"contents": {
"query": "山",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
}
},
"type": "t_article_contents",
"score_mode": "none",
"min_children": 0,
"max_children": 2147483647,
"ignore_unmapped": false,
"boost": 1.0
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"sort": [
{
"publish_time": {
"order": "desc"
}
}
]
}
可以使用inner_hits在查询父数据同时带出子数据(或子带出父)。
inner_hits默认只能查询3条数据,需要查询更多要自己设置from和size
{
"has_child": {
"query": {
"match": {
"contents": {
"query": "山",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
}
},
"inner_hits":{},
"type": "t_article_contents",
"score_mode": "none",
"min_children": 0,
"max_children": 2147483647,
"ignore_unmapped": false,
"boost": 1.0
}
}
logstash导入es关联关系
input {
jdbc {
jdbc_driver_library => "/usr/local/logstash-6.3.2/lib/mysql-connector-java-5.1.29.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/blog"
jdbc_user => "root"
jdbc_password => "root#$%123"
# statement_filepath => "filename.sql"
statement => "SELECT * from t_article where test_time> :sql_last_value order by test_time asc"
jdbc_paging_enabled => "true"
jdbc_page_size => "10"
jdbc_default_timezone =>"Asia/Shanghai"
type => "t_article"
tracking_column_type => "timestamp"
use_column_value => true
tracking_column => "test_time"
record_last_run => true
last_run_metadata_path => "/usr/local/logstash-6.3.2/config/last_run_metadata.txt"
schedule => "* * * * *"
}
jdbc {
jdbc_driver_library => "/usr/local/logstash-6.3.2/lib/mysql-connector-java-5.1.29.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/blog"
jdbc_user => "root"
jdbc_password => "root#$%123"
# statement_filepath => "filename.sql"
statement => "SELECT * from t_article_contents where test_time> :sql_last_value order by test_time asc"
jdbc_paging_enabled => "true"
jdbc_page_size => "10"
jdbc_default_timezone =>"Asia/Shanghai"
type => "t_article_contents"
tracking_column_type => "timestamp"
use_column_value => true
tracking_column => "test_time"
record_last_run => true
last_run_metadata_path => "/usr/local/logstash-6.3.2/config/last_run_content_metadata.txt"
schedule => "* * * * *"
}
}
filter {
if [type] == "t_article" {
ruby{
code => '
event.set("test333333333333",111);
field = {
"todo_id" => event.get("test_time")
}
join_field = {
"name": "t_article"
}
event.set("join_field",join_field)
'
}
json {
source => "message"
remove_field => ["message"]
}
json {
source => "{\"test\" : \"test1\"}"
target =>"aaaaaa"
}
mutate {
add_field => {
"join_field11" => "%{aaaaaa}"
"join_field22" => "%{test_time}"
}
}
}
if [type] == "t_article_contents" {
ruby {
code =>'
join_field = {
"name" => "t_article_contents","parent" => "%{article_id}"
}
event.set("join_field",join_field)
'
}
json {
source => "message"
remove_field => ["message"]
}
}
}
output {
if [type] == "t_article" {
elasticsearch {
hosts => ["39.108.231.144:9200"]
document_type => "t_article"
index => "blog"
document_id => "%{id}"
}
}
if [type] == "t_article_contents" {
elasticsearch {
hosts => ["39.108.231.144:9200"]
document_type => "t_article"
index => "blog"
document_id => "%{id}"
routing => "%{article_id}"
}
}
}
删除查询的数据
post http://www.582466.top:9200/blog/_delete_by_query
{
"query": {
"match_all": {}
}
}
方案三:嵌套json
不管是修改t_article还是修改t_article_contents都要全部取出来修改。
从mysql导入es的时候要在两张表的数据,整成嵌套json,可以使用插件logstash-filter-aggregate