lightdash 与dbt集成的玩法简单说明

原创

rongfengliang 2024-04-15 09:35:39 ©著作权

文章标签 json 元数据自定义 文章分类 Python 后端开发

©著作权归作者所有：来自51CTO博客作者rongfengliang的原创作品，请联系作者获取转载授权，否则将追究法律责任

lightdash 是依赖dbt 进行建模的bi 工具，以下说明下lightdash 是如何集成dbt 的

简单操作流程

如下，主要是安装cli，预处理表，然后创建项目

lightdash 与dbt集成的玩法简单说明_元数据

内部处理简单说明

packages/cli/src/index.ts 代码位置，主要是通过自定义一些dbt 的meta 信息，然后通过包装的dbt run 命令执行模型的创建（此处lightdash 会解析dbt
的manifest.json 文件）

lightdash dbt run 处理简单说明
代码在 packages/cli/src/handlers/dbt/run.ts 中
参考处理

export const dbtRunHandler = async (
    options: DbtRunHandlerOptions,
    command: Command,
) => {
    GlobalState.setVerbose(options.verbose);
 
    if (!command.parent) {
        throw new Error('Parent command not found');
    }
 
    await LightdashAnalytics.track({
        event: 'dbt_command.started',
        properties: {
            command: `${command.parent.args}`,
        },
    });
 
    const commands = command.parent.args.reduce<string[]>((acc, arg) => {
        if (arg === '--verbose') return acc;
        return [...acc, arg];
    }, []);
 
    GlobalState.debug(`> Running dbt command: ${commands}`);
 
    try {
        const subprocess = execa('dbt', commands, {
            stdio: 'inherit',
        });
        await subprocess;
    } catch (e: unknown) {
        const msg = e instanceof Error ? e.message : '-';
        await LightdashAnalytics.track({
            event: 'dbt_command.error',
            properties: {
                command: `${commands}`,
                error: `${msg}`,
            },
        });
        throw new ParseError(`Failed to run dbt:\n  ${msg}`);
    }
   //  生成解析，主要是对于dbt 的manifest 解析，获取模型信息，以及编译之后的模型信息（dbt 的run 与compile 生成的一些元数据是有差异的）
    await generateHandler({
        ...options,
        assumeYes: true,
        excludeMeta: options.excludeMeta,
    });
};
generateHandler 处理上边有一些简单的说明，我主要说明下核心处理

//  遍历编译的模型
for await (const compiledModel of compiledModels) {
        const spinner = GlobalState.startSpinner(
            `  Generating .yml for model ${styles.bold(compiledModel.name)}`,
        );
        try {
             // 获取数据仓库的表信息
            const table = await getWarehouseTableForModel({
                model: compiledModel,
                warehouseClient,
            });
            // 基于上边模型以及表信息更新模型的yaml 定义 packages/cli/src/dbt/models.ts ，具体处理比较复杂，详细的可以参考源码
            const { updatedYml, outputFilePath } = await findAndUpdateModelYaml(
                {
                    model: compiledModel,
                    table,
                    docs: manifest.docs,
                    includeMeta: !options.excludeMeta,
                    projectDir: absoluteProjectPath,
                    projectName: context.projectName,
                    assumeYes: options.assumeYes,
                },
            );
            try {
                const existingHeadComments = await getFileHeadComments(
                    outputFilePath,
                );
                const ymlString = yaml.dump(updatedYml, {
                    quotingType: '"',
                });
               // 写入项目schema 信息
                await fs.writeFile(
                    outputFilePath,
                    existingHeadComments
                        ? `${existingHeadComments}\n${ymlString}`
                        : ymlString,
                );
            } catch (e) {
                const msg = e instanceof Error ? e.message : '-';
                throw new ParseError(
                    `Failed to write file ${outputFilePath}\n ${msg}`,
                );
            }
            spinner.succeed(
                `  ${styles.bold(compiledModel.name)}${styles.info(
                    ` ➡️  ${path.relative(process.cwd(), outputFilePath)}`,
                )}`,
            );
        } catch (e: unknown) {
            const msg = e instanceof Error ? e.message : '-';
            await LightdashAnalytics.track({
                event: 'generate.error',
                properties: {
                    executionId,
                    trigger: 'generate',
                    error: `${msg}`,
                },
            });
            spinner.fail(`  Failed to generate ${compiledModel.name}.yml`);
            throw e;
        }
    }
lightdash deploy --create
packages/cli/src/handlers/deploy.ts
创建完成之后需要进行部署deployHandler 参考处理
export const deployHandler = async (options: DeployHandlerOptions) => {
    GlobalState.setVerbose(options.verbose);
    await checkLightdashVersion();
    const executionId = uuidv4();
    // 编译模型信息
    const explores = await compile(options);
 
    const config = await getConfig();
    let projectUuid: string;
 
    if (options.create !== undefined) {
        const project = await createNewProject(executionId, options);
        if (!project) {
            console.error(
                "To preview your project, you'll need to manually enter your warehouse connection details.",
            );
            const createProjectUrl =
                config.context?.serverUrl &&
                new URL('/createProject', config.context.serverUrl);
            if (createProjectUrl) {
                console.error(
                    `Fill out the project connection form here: ${createProjectUrl}`,
                );
            }
            return;
        }
        projectUuid = project.projectUuid;
        await setProject(projectUuid, project.name);
    } else {
        if (!(config.context?.project && config.context.serverUrl)) {
            throw new AuthorizationError(
                `No active Lightdash project. Run 'lightdash login --help'`,
            );
        }
        projectUuid = config.context.project;
    }
   // 部署的模型信息
    await deploy(explores, { ...options, projectUuid });
   // 项目信息
    const displayUrl = options.create
        ? `${config.context?.serverUrl}/createProject/cli?projectUuid=${projectUuid}`
        : `${config.context?.serverUrl}/projects/${projectUuid}/home`;
 
    console.error(`${styles.bold('Successfully deployed project:')}`);
    console.error('');
    console.error(`      ${styles.bold(`⚡️ ${displayUrl}`)}`);
    console.error('');
};
compile 方法
尽管方法名字为compile 但是实际上还是基于dbt run 生成的元数据信息进行check 然后转换为Explore 实体类型

export const compile = async (options: CompileHandlerOptions) => {
    const dbtVersion = await getDbtVersion();
    const manifestVersion = await getDbtManifest();
    GlobalState.debug(`> dbt version ${dbtVersion}`);
    const executionId = uuidv4();
    await LightdashAnalytics.track({
        event: 'compile.started',
        properties: {
            executionId,
            dbtVersion,
            useDbtList: !!options.useDbtList,
            skipWarehouseCatalog: !!options.skipWarehouseCatalog,
            skipDbtCompile: !!options.skipDbtCompile,
        },
    });
 
    if (!isSupportedDbtVersion(dbtVersion)) {
        if (process.env.CI === 'true') {
            console.error(
                `Your dbt version ${dbtVersion} does not match our supported versions (1.3.* - 1.7.*), this could cause problems on compile or validation.`,
            );
        } else {
            const answers = await inquirer.prompt([
                {
                    type: 'confirm',
                    name: 'isConfirm',
                    message: `${styles.warning(
                        `Your dbt version ${dbtVersion} does not match our supported version (1.3.* - 1.7.*), this could cause problems on compile or validation.`,
                    )}\nDo you still want to continue?`,
                },
            ]);
            if (!answers.isConfirm) {
                throw new Error(`Unsupported dbt version ${dbtVersion}`);
            }
        }
    }
 
    // Skipping assumes manifest.json already exists.
    let compiledModelIds: string[] | undefined;
    if (options.useDbtList) {
        compiledModelIds = await dbtList(options);
    } else if (!options.skipDbtCompile) {
        await dbtCompile(options);
    } else {
        GlobalState.debug('> Skipping dbt compile');
    }
 
    const absoluteProjectPath = path.resolve(options.projectDir);
    const absoluteProfilesPath = path.resolve(options.profilesDir);
 
    GlobalState.debug(`> Compiling with project dir ${absoluteProjectPath}`);
    GlobalState.debug(`> Compiling with profiles dir ${absoluteProfilesPath}`);
 
    const context = await getDbtContext({ projectDir: absoluteProjectPath });
    const profileName = options.profile || context.profileName;
    const { target } = await loadDbtTarget({
        profilesDir: absoluteProfilesPath,
        profileName,
        targetName: options.target,
    });
 
    GlobalState.debug(`> Compiling with profile ${profileName}`);
    GlobalState.debug(`> Compiling with target ${target}`);
 
    const credentials = await warehouseCredentialsFromDbtTarget(target);
    const warehouseClient = warehouseClientFromCredentials({
        ...credentials,
        startOfWeek: isWeekDay(options.startOfWeek)
            ? options.startOfWeek
            : undefined,
    });
    const manifest = await loadManifest({ targetDir: context.targetDir });
    const models = getModelsFromManifest(manifest).filter((model) => {
        if (compiledModelIds) {
            return compiledModelIds.includes(model.unique_id);
        }
        // in case they skipped the compile step, we check if the models are compiled
        return model.compiled;
    });
 
    const adapterType = manifest.metadata.adapter_type;
 
    const { valid: validModels, invalid: failedExplores } =
        await validateDbtModel(adapterType, models);
 
    if (failedExplores.length > 0) {
        const errors = failedExplores.map((failedExplore) =>
            failedExplore.errors.map(
                (error) => `- ${failedExplore.name}: ${error.message}\n`,
            ),
        );
        console.error(
            styles.warning(`Found ${
                failedExplores.length
            } errors when validating dbt models:
${errors.join('')}`),
        );
    }
 
    // Skipping assumes yml has the field types.
    let catalog: WarehouseCatalog = {};
    if (!options.skipWarehouseCatalog) {
        GlobalState.debug('> Fetching warehouse catalog');
        catalog = await warehouseClient.getCatalog(
            getSchemaStructureFromDbtModels(validModels),
        );
    } else {
        GlobalState.debug('> Skipping warehouse catalog');
    }
 
    const validModelsWithTypes = attachTypesToModels(
        validModels,
        catalog,
        false,
    );
 
    if (!isSupportedDbtAdapter(manifest.metadata)) {
        await LightdashAnalytics.track({
            event: 'compile.error',
            properties: {
                executionId,
                dbtVersion,
                error: `Dbt adapter ${manifest.metadata.adapter_type} is not supported`,
            },
        });
        throw new ParseError(
            `Dbt adapter ${manifest.metadata.adapter_type} is not supported`,
        );
    }
 
    GlobalState.debug(
        `> Converting explores with adapter: ${manifest.metadata.adapter_type}`,
    );
    const validExplores = await convertExplores(
        validModelsWithTypes,
        false,
        manifest.metadata.adapter_type,
        [DbtManifestVersion.V10, DbtManifestVersion.V11].includes(
            manifestVersion,
        )
            ? []
            : Object.values(manifest.metrics),
        warehouseClient,
    );
    console.error('');
 
    const explores = [...validExplores, ...failedExplores];
 
    explores.forEach((e) => {
        const status = isExploreError(e)
            ? styles.error('ERROR')
            : styles.success('SUCCESS');
        const errors = isExploreError(e)
            ? `: ${styles.error(e.errors.map((err) => err.message).join(', '))}`
            : '';
        console.error(`- ${status}> ${e.name} ${errors}`);
    });
    console.error('');
    const errors = explores.filter((e) => isExploreError(e)).length;
    console.error(
        `Compiled ${explores.length} explores, SUCCESS=${
            explores.length - errors
        } ERRORS=${errors}`,
    );
 
    await LightdashAnalytics.track({
        event: 'compile.completed',
        properties: {
            executionId,
            explores: explores.length,
            errors,
            dbtMetrics: Object.values(manifest.metrics).length,
            dbtVersion,
        },
    });
    return explores;
};
deploy 方法处理

export const deploy = async (
    explores: (Explore | ExploreError)[],
    options: DeployArgs,
): Promise<void> => {
    const errors = explores.filter((e) => isExploreError(e)).length;
    if (errors > 0) {
        if (options.ignoreErrors) {
            console.error(
                styles.warning(`\nDeploying project with ${errors} errors\n`),
            );
        } else {
            console.error(
                styles.error(
                    `Can't deploy with errors. If you still want to deploy, add ${styles.bold(
                        '--ignore-errors',
                    )} flag`,
                ),
            );
            process.exit(1);
        }
    }
   // 通过put 写入模型信息
    await lightdashApi<null>({
        method: 'PUT',
        url: `/api/v1/projects/${options.projectUuid}/explores`,
        body: JSON.stringify(explores),
    });
    await LightdashAnalytics.track({
        event: 'deploy.triggered',
        properties: {
            projectId: options.projectUuid,
        },
    });
};
Explore 类型定义

export type Explore = {
    name: string; // Must be sql friendly (a-Z, 0-9, _)
    label: string; // Friendly name
    tags: string[];
    groupLabel?: string;
    baseTable: string; // Must match a tableName in tables
    joinedTables: CompiledExploreJoin[]; // Must match a tableName in tables
    tables: { [tableName: string]: CompiledTable }; // All tables in this explore
    targetDatabase: SupportedDbtAdapter; // Type of target database e.g. postgres/redshift/bigquery/snowflake/databricks
    warehouse?: string;
    ymlPath?: string;
    sqlPath?: string;
};

说明

lightdash 对于dbt 的处理核心还是利用了dbt 的cli 命令，然后自己解析，之后通过接口写入数据到lightdash backend 服务中，整体上比较依赖manifest
同时部分解析上使用了cli output 为json 格式处理的，以上只是简单的关于dbt 集成部分的，实际上还有bi 分析部分的，后边说明下

参考资料

packages/cli/src/handlers/dbt/run.ts
packages/cli/src/handlers/deploy.ts
packages/cli/src/handlers/compile.ts
packages/common/src/compiler/translator.ts
https://docs.lightdash.com/get-started/setup-lightdash/get-project-lightdash-ready