flinkcdc-mysql-doris
是利用flinkcdc读取mysql的binlog日志,然后同步到doris里面,实现了业务数据实时的同步到doris。 该项目支持自动根据mysql的表结构自动在doris里面创建表,自动同步表结构的变更。 数据支持全量和增量的拉取;Flink的版本是1.16.0
gitee地址
https://gitee.com/lxp4352/flinkcdc-mysql-doris
启动类
package com.ztorn;
import com.ztorn.cdc.CDCBuilder;
import com.ztorn.cdc.CDCBuilderFactory;
import com.ztorn.cdc.SinkBuilder;
import com.ztorn.cdc.SinkBuilderFactory;
import com.ztorn.common.assertion.Asserts;
import com.ztorn.common.model.FlinkCDCConfig;
import com.ztorn.common.model.Schema;
import com.ztorn.driver.DriverSchema;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.*;
@Slf4j
public class ZtornCDCApplication {
private static final String METADATA_TYPE = "MySql";
//本地用这个
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Map<String,String> source = new HashMap<>();
Map<String,String> sink = new HashMap<>();
Map<String, String> splitConfig = new HashMap<>();
List<Schema> schemaList = new ArrayList<>();
splitConfig.put("enable","true");
splitConfig.put("match_number_regex","Database");
splitConfig.put("match_way","NAME");
source.put("scan.startup.timestampMillis","1691049645");
source.put("server-time-zone","UTC+8");
sink.put("serviceUrl","pulsar://localhost:8080");
sink.put("adminUrl","pulsar://localhost:8080");
sink.put("topics","persistent://flink/flink/track_flink_2");
sink.put("connector","datastream-doris");
sink.put("fenodes","192.168.0.1:8030");
sink.put("doris.ip","192.168.0.1");
sink.put("doris.port","9030");
sink.put("username","username");
sink.put("password","password");
sink.put("sink.properties.format","json");
sink.put("sink.properties.strip_outer_array","false");
sink.put("auto.create","true");
sink.put("sink.db","${schemaName}");
sink.put("url","jdbc:mysql://192.168.0.1:9030");
sink.put("sink-connector","Doris");
FlinkCDCConfig config = new FlinkCDCConfig(METADATA_TYPE,"192.168.0.1",3306,"username",
"password",10000,1,"database","schema",
"database\\.tablename"
, "initial",
splitConfig,null,source,sink,null,true,true);
if (Asserts.isNotNull(config.getParallelism())) {
env.setParallelism(config.getParallelism());
log.info("Set parallelism: " + config.getParallelism());
}
if (Asserts.isNotNull(config.getCheckpoint())) {
env.enableCheckpointing(config.getCheckpoint());
CheckpointConfig checkpointConfig = env.getCheckpointConfig();
checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
checkpointConfig.enableUnalignedCheckpoints();
checkpointConfig.setCheckpointStorage("file:///D:/F/checkpoint/");
log.info("Set checkpoint: " + config.getCheckpoint());
}
CDCBuilder cdcBuilder = CDCBuilderFactory.buildCDCBuilder(config);
final List<String> schemaTableNameList = DriverSchema.getSchemaTableNameList(schemaList, config, cdcBuilder);
config.setSchemaList(schemaList);
config.setSchemaTableNameList(schemaTableNameList);
config.setSchemaFieldName(cdcBuilder.getSchemaFieldName());
DataStreamSource<String> streamSource = cdcBuilder.build(env);
// streamSource.print();
// env.execute();
SinkBuilder sinkBuilder = SinkBuilderFactory.buildSinkBuilder(config);
DataStreamSource dataSink = sinkBuilder.build(cdcBuilder, env, streamSource);
dataSink.print();
env.execute();
}
public void getTable(){
}
}