最近在处理一些clickhouse数据,其实感觉和mysql大差不差,但有几点遇到的问题记录一下。
一、准备工作
- 新建表
create table product(
_id String,
productInfo String,
dt Date
) engine = ReplacingMergeTree PARTITION BY dt
PRIMARY KEY (_id, dt)
ORDER BY (_id, dt)
SETTINGS index_granularity = 8192;
- 注:productInfo字段类型为必填!
- 添加数据
INSERT INTO product (_id,productInfo,dt) VALUES
('1','[{''productCode'': ''1'', ''productName'': ''羽绒服''}, {''productCode'': ''2'', ''productName'': ''棉袄''}, {''productCode'': ''3'', ''productName'': ''牛仔裤''}, {''productCode'': ''4'', ''productName'': ''衬衣''}, {''productCode'': ''5'', ''productName'': ''帽子''}]','2023-11-12'),
('1','[{''productCode'': ''2'', ''productName'': ''棉袄''}, {''productCode'': ''3'', ''productName'': ''牛仔裤''}, {''productCode'': ''4'', ''productName'': ''衬衣''}, {''productCode'': ''5'', ''productName'': ''帽子''}]','2023-11-13'),
('2','[{''productCode'': ''1'', ''productName'': ''羽绒服''}, {''productCode'': ''2'', ''productName'': ''棉袄''}, {''productCode'': ''3'', ''productName'': ''牛仔裤''}, {''productCode'': ''5'', ''productName'': ''帽子''}]','2023-11-13'),
('3','[{''productCode'': ''2'', ''productName'': ''棉袄''}, {''productCode'': ''4'', ''productName'': ''衬衣''}, {''productCode'': ''5'', ''productName'': ''帽子''}]','2023-11-13'),
('4','[{''productCode'': ''3'', ''productName'': ''牛仔裤''}, {''productCode'': ''4'', ''productName'': ''衬衣''}, {''productCode'': ''5'', ''productName'': ''帽子''}]','2023-11-13'),
('5','[{''productCode'': ''1'', ''productName'': ''羽绒服''}, {''productCode'': ''5'', ''productName'': ''帽子''}]','2023-11-13');
二、数据去重
使用argMax()函数
select
_id,
argMax(productInfo, dt),
Max(dt)
from
product
group by _id
order by _id;
返回结果:
可以看到结果是取得dt最新4条数据。
三、处理JSONArray字符串
此时我们想要查询结果中包含商品编号为1或3的数据,sql如下:
select
_id,
productInfo,
replace(productInfo, '\'', '\"') as new,
JSONExtractArrayRaw(new) as arr,
arrayJoin(arr) as json,
visitParamExtractString(json, 'productCode') as productCode,
dt
from product
where productCode in ('1', '3');
通过这一系列操作可以得到包含商品编号为1,3的数据为(1,2,4,5)
再结合上述去重代码
select
_id,
argMax(productInfo, dt),
MAX(dt)
from
product
where visitParamExtractString(arrayJoin(JSONExtractArrayRaw(replace(productInfo, '\'', '\"'))), 'productCode') in ('1', '3')
group by _id
order by _id;