文章目录
- 系列文章目录
- 前言
- 一、实现思路
- 二、测试案例
- 1. 建表
- 2. 编写一提到的代码
- 三、测试
- 1.mysql,目前已经有的测试数据
- 2. 往生产者增加一条20220617的数据
- 3.分析binglog
- 4. 发送一条在b,20220617,发现结果表20220617的pv变为2
- 5.分析mysql binglog:
- 总结
前言
续上一课,计算一个PV的,案例,最终把结果更新到MYSQL
本文测试如下语法是否增量更新
insert into default_catalog.default_database.mysql_pv select dt, cast(count() as int) as pv from hive_iceberg_catalog.ods_base.IcebergSink_XXZH /+ OPTIONS(‘streaming’=‘true’, ‘monitor-interval’=‘1s’)*/ where dt is not null group by dt;
一、实现思路
思路如上图,从左到右,数据从kafka流入,pv统计值,从db查看
二、测试案例
1. 建表
DROP TABLE IF EXISTS `mysql_pv`;
CREATE TABLE `mysql_pv` (
`dt` int(255) NOT NULL,
`pv` int(255) DEFAULT NULL,
PRIMARY KEY (`dt`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
2. 编写一提到的代码
CREATE TABLE IF NOT EXISTS KafkaTableSource_XXZH (
`log` STRING,
`dt` STRING
) WITH (
'connector' = 'kafka',
'topic' = 'test_xxzh',
'properties.bootstrap.servers' = 'hadoop101:9092,hadoop102:9092,hadoop103:9092',
'' = 'testGroup',
'scan.startup.mode' = 'earliest-offset',
'csv.ignore-parse-errors'='true',
'format' = 'csv'
);
CREATE TABLE if not exists mysql_pv(
`dt` INT NOT NULL,
`pv` INT NOT NULL,
PRIMARY KEY(dt) NOT ENFORCED
) with(
'connector' = 'jdbc',
'url' = 'jdbc:mysql://hadoop103:3306/xxzh_stock',
'username' = 'hive',
'password' = '123456',
'table-name' = 'mysql_pv'
);
CREATE CATALOG hive_iceberg_catalog WITH (
'type'='iceberg',
'catalog-type'='hive',
'uri'='thrift://hadoop101:9083',
'clients'='5',
'property-version'='1',
'warehouse'='hdfs:///user/hive/warehouse/hive_iceberg_catalog'
);
use catalog hive_iceberg_catalog;
CREATE TABLE IF NOT EXISTS ods_base.IcebergSink_XXZH (
`log` STRING,
`dt` INT
)with(
'write.metadata.delete-after-commit.enabled'='true',
'write.metadata.previous-versions-max'='5',
'format-version'='2'
);
insert into hive_iceberg_catalog.ods_base.IcebergSink_XXZH select log, cast(dt as int) as dt from default_catalog.default_database.KafkaTableSource_XXZH;
insert into default_catalog.default_database.mysql_pv select dt, cast(count(*) as int) as pv from hive_iceberg_catalog.ods_base.IcebergSink_XXZH /*+ OPTIONS('streaming'='true', 'monitor-interval'='1s')*/ where dt is not null group by dt;
三、测试
1.mysql,目前已经有的测试数据
2. 往生产者增加一条20220617的数据
a,20220617
发现: mysql结果表: 增加了一条
3.分析binglog
[root@hadoop103 mysql]# mysqlbinlog -v --base64-output=decode-rows mysql-bin.000004 |tail -n 100
BEGIN
/*!*/;
# at 28356
#220617 14:11:14 server id 1 end_log_pos 28414 CRC32 0xfcdda6c9 Table_map: `xxzh_stock`.`mysql_pv` mapped to number 181
# at 28414
#220617 14:11:14 server id 1 end_log_pos 28458 CRC32 0xb0c17384 Write_rows: table id 181 flags: STMT_END_F
### INSERT INTO `xxzh_stock`.`mysql_pv`
### SET
### @1=20220617
### @2=1
# at 28458
#220617 14:11:14 server id 1 end_log_pos 28489 CRC32 0xdb5dca0c Xid = 17793262
COMMIT/*!*/;
SET @@SESSION.GTID_NEXT= 'AUTOMATIC' /* added by mysqlbinlog */ /*!*/;
DELIMITER ;
# End of log file
发现插入了一条 @1=20220617, @2=1
4. 发送一条在b,20220617,发现结果表20220617的pv变为2
5.分析mysql binglog:
看binlog命令:
[root@hadoop103 mysql]# mysqlbinlog -v --base64-output=decode-rows mysql-bin.000004 |tail -n 100
### INSERT INTO `xxzh_stock`.`mysql_pv`
### SET
### @1=20220617
### @2=1
# at 28458
#220617 14:11:14 server id 1 end_log_pos 28489 CRC32 0xdb5dca0c Xid = 17793262
COMMIT/*!*/;
# at 28489
#220617 14:13:25 server id 1 end_log_pos 28554 CRC32 0xadfe9011 Anonymous_GTID last_committed=101 sequence_number=102 rbr_only=yes
/*!50718 SET TRANSACTION ISOLATION LEVEL READ COMMITTED*//*!*/;
SET @@SESSION.GTID_NEXT= 'ANONYMOUS'/*!*/;
# at 28554
#220617 14:13:25 server id 1 end_log_pos 28632 CRC32 0xd07997a1 Query thread_id=588 exec_time=0 error_code=0
SET TIMESTAMP=1655446405/*!*/;
BEGIN
/*!*/;
# at 28632
#220617 14:13:25 server id 1 end_log_pos 28690 CRC32 0x75154cc2 Table_map: `xxzh_stock`.`mysql_pv` mapped to number 181
# at 28690
#220617 14:13:25 server id 1 end_log_pos 28744 CRC32 0xf4ff361c Update_rows: table id 181 flags: STMT_END_F
### UPDATE `xxzh_stock`.`mysql_pv`
### WHERE
### @1=20220617
### @2=1
### SET
### @1=20220617
### @2=2
# at 28744
#220617 14:13:25 server id 1 end_log_pos 28775 CRC32 0xfe2b874e Xid = 17799053
COMMIT/*!*/;
SET @@SESSION.GTID_NEXT= 'AUTOMATIC' /* added by mysqlbinlog */ /*!*/;
DELIMITER ;
# End of log file
发现只是对增量的数据进行update
总结
ICEBERG的更新,是增量更新的,旧的结果如果没有发生变更,就不会产生更新