初探ES date类型的时区机制


文章目录

  • 初探ES date类型的时区机制
  • 验证场景
  • date字段类型指定唯一字符串的日期格式(非默认)
  • date类型采用默认的格式
  • date类型采用混合时间格式
  • `time_zone`参数对写入时 是否有作用
  • 小结



ES默认date类型的 format格式为:

strict_date_optional_time||epoch_millis

时间范围查询示例

gt:大于

gte:大于等于

lt:小于

lte:小于等于

GET _search
{
    "query": {
        "range" : {
                "gte": "01/01/2012",
                "lte": "2013",
                "format": "dd/MM/yyyy||yyyy"
        }
    }
}

验证场景

date字段类型指定唯一字符串的日期格式(非默认)

# 创建索引-指定单一的日期格式,不指定时区。
PUT gudongtest001
{
    "settings": {
        "number_of_replicas": 0
    },
    "mappings": {
        "_doc": {
            "properties": {
                "date": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss",
                    "store": true
                },
                "message": {
                    "type": "keyword"
                }
            }
        }
    }
}

# 写入数据

PUT gudongtest001/_doc/1
{ "date": "2023-02-28 00:01:10","message":"trying out Elasticsearch" } 

PUT gudongtest001/_doc/2
{ "date": "2023-02-28 09:01:10","message":"trying out Elasticsearch" } 

PUT gudongtest001/_doc/3
{ "date": "2023-02-28 18:01:10","message":"trying out Elasticsearch" } 

# 不满足日期格式的数据无法写入了
PUT gudongtest001/_doc/4
{ "date": "1677722733000","message":"trying out Elasticsearch" } 

# 查询
# 正常查询与显示
GET gudongtest001/_search
{
    "query": {
        "range" : {
            "date" : {
                "gte" : "2023-02-28 00:01:10"
            }
        }
    }
}

# 查询过程中进行format -1677513670000在东八区的时间为2023-02-28 00:01:10
GET gudongtest001/_search
{
    "query": {
        "range" : {
            "date" : {
                "gte" : "1677513670000",
                "format": "epoch_millis"
            }
        }
    }
}

# 以上查询数据是没问题的

date类型采用默认的格式

PUT gudongtest002
{
    "settings": {
        "number_of_replicas": 0
    },
    "mappings": {
        "_doc": {
            "_all": {
                "enabled": false
            },
            "properties": {
                "date": {
                    "type": "date",
                    "store": true
                },
                "message": {
                    "type": "keyword"
                }
            }
        }
    }
}

# 写入数据
# 不支持非UTC格式的数据写入
PUT gudongtest002/_doc/1
{ "date": "2023-02-28 00:01:10","message":"trying out Elasticsearch" } 

PUT gudongtest002/_doc/1
{ "date": "1677513670000","message":"trying out Elasticsearch" } 

PUT gudongtest002/_doc/2
{ "date": "1677546070000","message":"trying out Elasticsearch" } 

PUT gudongtest002/_doc/3
{ "date": "1677578470000","message":"trying out Elasticsearch" } 

# 查询数据
GET gudongtest002/_search
{
    "query": {
        "range" : {
            "date" : {
                "gte" : "1677513670000"
            }
        }
    }
}

# 查询数据 id:1的记录没有查询到
GET gudongtest002/_search
{
    "query": {
        "range" : {
            "date" : {
                "gte" : "2023-02-28 00:01:10",
                 "format": "yyyy-MM-dd HH:mm:ss"
            }
        }
    }
}

# 查看数据存储情况
GET gudongtest002/_search
{
    "stored_fields": [
        "date"
    ],
    "query": {
        "match_all": {}
    }
}

## id:2 date:"2023-02-28T01:01:10.000Z"  1677546070000
## id:1 date:"2023-02-27T16:01:10.000Z"  1677513670000
## id:3 date:"2023-02-28T10:01:10.000Z"  1677578470000
### 可以发现 对于unixtime时间戳在存入ES的时候,采用的是0时区。索引id1在es中的时间不满足查询条件,所以没有查询到。

# 查询是带上时区,这个时区是作于用存储数据的,并未是查询条件。
GET gudongtest002/_search
{
    "query": {
        "range" : {
            "date" : {
                "gte" : "2023-02-28 00:01:10",
                 "format": "yyyy-MM-dd HH:mm:ss",
                 "time_zone": "+08:00"
            }
        }
    }
}

date类型采用混合时间格式

PUT gudongtest003
{
    "settings": {
        "number_of_replicas": 0
    },
    "mappings": {
        "_doc": {
            "_all": {
                "enabled": false
            },
            "properties": {
                "date": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss||epoch_millis",
                    "store": true
                },
                "message": {
                    "type": "keyword"
                }
            }
        }
    }
}

# 满足任一格式既可以写入
PUT gudongtest003/_doc/1
{ "date": "2023-02-28 00:01:10","message":"trying out Elasticsearch" } 

PUT gudongtest003/_doc/2
{ "date": "1677513670000","message":"trying out Elasticsearch" } 

PUT gudongtest003/_doc/3
{ "date": "1677546070000","message":"trying out Elasticsearch" } 

PUT gudongtest003/_doc/4
{ "date": "1677578470000","message":"trying out Elasticsearch" } 


# 使用时间戳查询没问题
GET gudongtest003/_search
{

    "query": {
        "range" : {
            "date" : {
                "gte" : "1677513670000"
            }
        }
    }
}

# 使用字符串查询--会出现同样的问题-查询的时候需要增加上时区,即查询条件时间代表的时区
GET gudongtest003/_search
{
    "stored_fields": [
        "date"
    ],
    "query": {
        "range": {
            "date": {
                "gte": "2023-02-28 00:01:10",
                "time_zone": "+08:00"
            }
        }
    }
}

# 上述查询的时候_id为2的记录查询不到
GET gudongtest003/_doc/2/_explain
{
    "query": {
        "range": {
            "date": {
                "gte": "2023-02-28 00:01:10",
                "lt": "2023-03-01 00:01:10"
            }
        }
    }
}

{
    "_index": "gudongtest003",
    "_type": "_doc",
    "_id": "2",
    "matched": false,
    "explanation": {
        "value": 0,
        "description": "date:[1677542470000 TO 1677628869999] doesn't match id 0",
        "details": []
    }
}

1677542470000(2023-02-28 08:01:10) to 1677628869999(2023-03-01 08:01:09)
# 查询时,查询条件的时间字符转成的,采用的是0时区。而id2中date是1677513670000,就无法满足了。

time_zone参数对写入时 是否有作用

PUT gudongtest004
{
    "settings": {
        "number_of_replicas": 0
    },
    "mappings": {
        "_doc": {
            "_all": {
                "enabled": false
            },
            "properties": {
                "date": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss",
                    "store": true
                },
                "message": {
                    "type": "keyword",
                    "doc_values": false
                }
            }
        }
    }
}

#写入数据
PUT gudongtest004/_doc/1
{
    "date": "2023-02-28 00:01:10",
    "message": "trying out Elasticsearch",
    "time_zone": "+08:00"
}

PUT gudongtest004/_doc/2
{
    "date": "2023-02-28 00:01:10",
    "message": "trying out Elasticsearch"
}

PUT gudongtest004/_doc/3
{ "date": "1677546070000","message":"trying out Elasticsearch" } 

PUT gudongtest004/_doc/4
{ "date": "1677546070000","message":"trying out Elasticsearch","time_zone": "+08:00" } 

# 查看数据存储情况
GET gudongtest004/_search
{
    "stored_fields": [
        "date"
    ],
    "query": {
        "match_all": {}
    }
}



## id:2 date:"2023-02-28 00:01:10"  1677546070000
## id:1 date:"2023-02-28 00:01:10"  1677513670000
## id:3 date:"2023-02-28 01:01:10"  1677546070000
## id:4 date:"2023-02-28 01:01:10"  1677546070000
### "time_zone": "+08:00" 形式的时区参数无效

#有作用的方式之一
POST gudongtest004/_doc/5
{
  "date":"2019-12-11T08:00:00+08:00"
}

小结

  • 创建索引的时候无法指定时区
  • 采用单一字符串格式时,写入和查询是可以忽略时区。
  • ES内部的时区是0时区
  • 写入字符串没问题,写入时间戳时会少八个小时(因为我们在东八区)
  • 在时间戳与字符串格式混用的场景下,最好查询时,统一以时间戳作为查询条件。
  • 混用多个字符串格式没有问题,尽量不要混时间戳

彩蛋:在_ingest中结合timezone控制写入时区

PUT _ingest/pipeline/chage_utc_to_asiash
{
  "processors": [
    {
      "date" : {
        "field" : "my_time",
        "target_field": "my_time", 
        "formats" : ["yyyy-MM-dd HH:mm:ss"],
        "timezone" : "Asia/Shanghai"
      }
    }
  ]
}

PUT gudongtest005
{
    "settings": {
        "default_pipeline": "chage_utc_to_asiash",
        "number_of_replicas": 0
    },
    "mappings": {
        "_doc": {
            "properties": {
                "my_time": {
                    "type": "date"
                }
            }
        }
    }
}

PUT gudongtest005/_doc/1
{
  "my_time": "2021-08-09 08:07:16"
} 

GET gudongtest005/_search
{
    "query": {
        "match_all": {}
    }
}

 "my_time": "2021-08-09T08:07:16.000+08:00"