文章目录

1. 构建json,添加数据分片

ETL异构数据源Datax_使用数据分片提升同步速度_05_oracle

{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "oraclereader",
"parameter": {
"column": [
"IDNO",
"COL1",
"COL2",
"COL3",
"DT",
"COL5",
"COL6",
"COL7",
"COL8",
"COL9",
"COL10"
],
splitPk:"IDNO",
"connection": [
{
"jdbcUrl": [
"jdbc:oracle:thin:@192.xxx.xxx.xxx:1521:orcl"
],
"table": [
"TEST.OTBS1"
]
}
],
"username": "username",
"password": "password"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": [
"IDNO",
"COL1",
"COL2",
"COL3",
"DT",
"COL5",
"COL6",
"COL7",
"COL8",
"COL9",
"COL10"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://127.0.0.1:3306/datax?autoRecnotallow=true&useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=CONVERT_TO_NULL&useSSL=false&serverTimeznotallow=CTT&nullCatalogMeansCurrent=true",
"table": [
"otbs1"
]
}
],
"username": "root",
"password": "123456"
}
}
}
]
}
}
2. Mysql数据清除

清除mysql otbs1表数据

truncate table otbs1;

3. 数据分片前后对比

数据分片前

2021-06-23 12:28:12.390 [job-0] INFO  StandAloneJobContainerCommunicator - Total 1048576 records, 69143488 bytes | Speed 1.65MB/s, 26214 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 26.038s |  All Task WaitReaderTime 8.483s | Percentage 100.00%
2021-06-23 12:28:12.402 [job-0] INFO JobContainer -
任务启动时刻 : 2021-06-23 12:27:31
任务结束时刻 : 2021-06-23 12:28:12
任务总计耗时 : 41s
任务平均流量 : 1.65MB/s
记录写入速度 : 26214rec/s
读出记录总数 : 1048576
读写失败总数 :

数据分片后

2021-06-23 12:59:01.629 [job-0] INFO  JobContainer - 
任务启动时刻 : 2021-06-23 12:58:29
任务结束时刻 : 2021-06-23 12:59:01
任务总计耗时 : 31s
任务平均流量 : 2.20MB/s
记录写入速度 : 34952rec/s
读出记录总数 : 1048576
读写失败总数 :

速度相比数据分片前提升了10s

同步日志,相比数据分片前做了数据分片处理,并发3个channel处理16个任务。

channel并发3个未生效

ETL异构数据源Datax_使用数据分片提升同步速度_05_mysql_02

2021-06-23 12:58:31.020 [job-0] INFO  JobContainer - jobContainer starts to do prepare ...
2021-06-23 12:58:31.020 [job-0] INFO JobContainer - DataX Reader.Job [oraclereader] do prepare work .
2021-06-23 12:58:31.020 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] do prepare work .
2021-06-23 12:58:31.021 [job-0] INFO JobContainer - jobContainer starts to do split ...
2021-06-23 12:58:31.021 [job-0] INFO JobContainer - Job set Channel-Number to 3 channels.
2021-06-23 12:58:31.113 [job-0] INFO SingleTableSplitUtil - split pk [sql=SELECT * FROM ( SELECT IDNO FROM DBTEST.OTBS1 SAMPLE (0.1) WHERE (IDNO IS NOT NULL) ORDER BY DBMS_RANDOM.VALUE) WHERE ROWNUM <= 15 ORDER by IDNO ASC] is running...
2021-06-23 12:58:31.389 [job-0] INFO SingleTableSplitUtil - After split(), allQuerySql=[
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (42075 <= IDNO AND IDNO < 77408)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (77408 <= IDNO AND IDNO < 187833)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (187833 <= IDNO AND IDNO < 263631)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (263631 <= IDNO AND IDNO < 349253)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (349253 <= IDNO AND IDNO < 364994)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (364994 <= IDNO AND IDNO < 434398)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (434398 <= IDNO AND IDNO < 437250)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (437250 <= IDNO AND IDNO < 516705)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (516705 <= IDNO AND IDNO < 555961)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (555961 <= IDNO AND IDNO < 578695)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (578695 <= IDNO AND IDNO < 638120)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (638120 <= IDNO AND IDNO < 655685)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (655685 <= IDNO AND IDNO < 859873)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where (859873 <= IDNO AND IDNO <= 962533)
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where ((IDNO < 42075) OR (962533 < IDNO))
select IDNO,COL1,COL2,COL3,DT,COL5,COL6,COL7,COL8,COL9,COL10 from TEST.OTBS1 where IDNO IS NULL
].
2021-06-23 12:58:31.390 [job-0] INFO JobContainer - DataX Reader.Job [oraclereader] splits to [16] tasks.
2021-06-23 12:58:31.394 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] splits to [16] tasks.
2021-06-23 12:58:31.431 [job-0] INFO JobContainer - jobContainer starts to do schedule ...
2021-06-23 12:58:31.460 [job-0] INFO JobContainer - Scheduler starts [1] taskGroups.
2021-06-23 12:58:31.463 [job-0] INFO JobContainer - Running by standalone Mode.
2021-06-23 12:58:31.487 [taskGroup-0] INFO TaskGroupContainer - taskGroupId=[0] start [3] channels for [16] tasks.
2021-06-23 12:58:31.508 [taskGroup-0] INFO Channel - Channel set byte_speed_limit to -1, No bps activated.
2021-06-23 12:58:31.508 [taskGroup-0] INFO Channel - Channel set