flinkcdc-mysql-doris是利用flinkcdc读取mysql的binlog日志,然后同步到doris里面,实现了业务数据实时的同步到doris。 该项目支持自动根据mysql的表结构自动在doris里面创建表,自动同步表结构的变更。 数据支持全量和增量的拉取;Flink的版本是1.16.0

gitee地址

https://gitee.com/lxp4352/flinkcdc-mysql-doris

启动类

package com.ztorn;

import com.ztorn.cdc.CDCBuilder;
import com.ztorn.cdc.CDCBuilderFactory;
import com.ztorn.cdc.SinkBuilder;
import com.ztorn.cdc.SinkBuilderFactory;
import com.ztorn.common.assertion.Asserts;
import com.ztorn.common.model.FlinkCDCConfig;
import com.ztorn.common.model.Schema;
import com.ztorn.driver.DriverSchema;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.*;

@Slf4j
public class ZtornCDCApplication {


    private static final String METADATA_TYPE = "MySql";

    //本地用这个
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Map<String,String> source = new HashMap<>();
        Map<String,String> sink = new HashMap<>();
        Map<String, String> splitConfig = new HashMap<>();
        List<Schema> schemaList = new ArrayList<>();
        splitConfig.put("enable","true");
        splitConfig.put("match_number_regex","Database");
        splitConfig.put("match_way","NAME");
        source.put("scan.startup.timestampMillis","1691049645");
        source.put("server-time-zone","UTC+8");
        sink.put("serviceUrl","pulsar://localhost:8080");
        sink.put("adminUrl","pulsar://localhost:8080");
        sink.put("topics","persistent://flink/flink/track_flink_2");
        sink.put("connector","datastream-doris");
        sink.put("fenodes","192.168.0.1:8030");
        sink.put("doris.ip","192.168.0.1");
        sink.put("doris.port","9030");
        sink.put("username","username");
        sink.put("password","password");
        sink.put("sink.properties.format","json");
        sink.put("sink.properties.strip_outer_array","false");
        sink.put("auto.create","true");
        sink.put("sink.db","${schemaName}");
        sink.put("url","jdbc:mysql://192.168.0.1:9030");
        sink.put("sink-connector","Doris");
        FlinkCDCConfig config = new FlinkCDCConfig(METADATA_TYPE,"192.168.0.1",3306,"username",
                "password",10000,1,"database","schema",
                "database\\.tablename"
                , "initial",
                splitConfig,null,source,sink,null,true,true);
        if (Asserts.isNotNull(config.getParallelism())) {
            env.setParallelism(config.getParallelism());
            log.info("Set parallelism: " + config.getParallelism());
        }
        if (Asserts.isNotNull(config.getCheckpoint())) {
            env.enableCheckpointing(config.getCheckpoint());
            CheckpointConfig checkpointConfig = env.getCheckpointConfig();
            checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
            checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
            checkpointConfig.enableUnalignedCheckpoints();
            checkpointConfig.setCheckpointStorage("file:///D:/F/checkpoint/");
            log.info("Set checkpoint: " + config.getCheckpoint());
        }
        CDCBuilder cdcBuilder = CDCBuilderFactory.buildCDCBuilder(config);
        final List<String> schemaTableNameList = DriverSchema.getSchemaTableNameList(schemaList, config, cdcBuilder);
        config.setSchemaList(schemaList);
        config.setSchemaTableNameList(schemaTableNameList);
        config.setSchemaFieldName(cdcBuilder.getSchemaFieldName());
        DataStreamSource<String> streamSource = cdcBuilder.build(env);
//        streamSource.print();
//        env.execute();
        SinkBuilder sinkBuilder = SinkBuilderFactory.buildSinkBuilder(config);
        DataStreamSource dataSink = sinkBuilder.build(cdcBuilder, env, streamSource);
        dataSink.print();
        env.execute();
    }

    public void getTable(){

    }
}