通过kettle开发工具spoon打开的编辑页面之后,默认是在当前用户下文件夹内创建一个.kettle的文件夹并加入kettle.properties配置文件
注:修改kettle.properties配置文件之后需要重启kettle程序才会重新生效
注意:kettle.properties为全局参数配置文件,启动Spoon时会自动读取本文件中的全部内容至内存中,所以如果在kettle运行过程中手动修改了这个配置文件,则需重启Spoon才会生效。
首先需要在指定文件夹中建好所需要的日志表,接下来就在kettle自动生成中的配置文件kettle.properties中配置每个日志的数据库及表名,这里数据库我选择用的共享的(共享的数据库连接存放在shared.xml文件中),kettle就能读的到
# trans日志(转换)
KETTLE_TRANS_LOG_DB=10.15.30.168
KETTLE_TRANS_LOG_TABLE=r_log_trans
# 步骤日志表 (步骤)
KETTLE_STEP_LOG_DB=10.15.30.168
KETTLE_STEP_LOG_TABLE=r_log_trans_step
# trans性能日志(运行)
KETTLE_TRANS_PERFORMANCE_LOG_DB=10.15.30.168
KETTLE_TRANS_PERFORMANCE_LOG_TABLE=r_log_trans_step
# 通道日志表(日志通道)
KETTLE_CHANNEL_LOG_DB=10.15.30.168
KETTLE_CHANNEL_LOG_TABLE=r_log_channel
# job和trans的Metrics(度量)日志表
KETTLE_METRICS_LOG_DB=10.15.30.168
KETTLE_METRICS_LOG_TABLE=r_log_trans_metrics
# job日志
KETTLE_JOB_LOG_DB=10.15.30.168
KETTLE_JOB_LOG_TABLE=r_log_job
# 作业项日志表
KETTLE_JOBENTRY_LOG_DB=10.15.30.168
KETTLE_JOBENTRY_LOG_TABLE=r_log_job_step
以下是kettle的建表SQL及部分表注释
-- 转换日志表
-- KETTLE_TRANS_LOG_TABLE
CREATE TABLE r_log_trans
(
ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, TRANSNAME VARCHAR(255)
, STATUS VARCHAR(15)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, STARTDATE DATETIME
, ENDDATE DATETIME
, LOGDATE DATETIME
, DEPDATE DATETIME
, REPLAYDATE DATETIME
, LOG_FIELD LONGTEXT
);
CREATE INDEX IDX_r_log_trans_1 ON r_log_trans(ID_BATCH);
CREATE INDEX IDX_r_log_trans_2 ON r_log_trans(ERRORS, STATUS, TRANSNAME);
-- 步骤日志表
-- KETTLE_STEP_LOG_TABLE
CREATE TABLE r_log_trans_step
(
ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, STEP_COPY INT
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
);
-- 运行日志表
-- KETTLE_TRANS_PERFORMANCE_LOG_TABLE
CREATE TABLE r_log_trans_running
(
ID_BATCH INT
, SEQ_NR INT
, LOGDATE DATETIME
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, STEP_COPY INT
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, INPUT_BUFFER_ROWS BIGINT
, OUTPUT_BUFFER_ROWS BIGINT
);
-- 通道日志表(job、trans共用的)
-- KETTLE_CHANNEL_LOG_TABLE
CREATE TABLE r_log_channel
(
ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, LOGGING_OBJECT_TYPE VARCHAR(255)
, OBJECT_NAME VARCHAR(255)
, OBJECT_COPY VARCHAR(255)
, REPOSITORY_DIRECTORY VARCHAR(255)
, FILENAME VARCHAR(255)
, OBJECT_ID VARCHAR(255)
, OBJECT_REVISION VARCHAR(255)
, PARENT_CHANNEL_ID VARCHAR(255)
, ROOT_CHANNEL_ID VARCHAR(255)
);
-- Metrics log table
-- KETTLE_METRICS_LOG_TABLE
CREATE TABLE r_log_trans_metrics
(
ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, METRICS_DATE DATETIME
, METRICS_CODE VARCHAR(255)
, METRICS_DESCRIPTION VARCHAR(255)
, METRICS_SUBJECT VARCHAR(255)
, METRICS_TYPE VARCHAR(255)
, METRICS_VALUE BIGINT
);
-- 作业日志表
-- KETTLE_JOB_LOG_TABLE
CREATE TABLE r_log_job
(
ID_JOB INT
, CHANNEL_ID VARCHAR(255)
, JOBNAME VARCHAR(255)
, STATUS VARCHAR(15)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, STARTDATE DATETIME
, ENDDATE DATETIME
, LOGDATE DATETIME
, DEPDATE DATETIME
, REPLAYDATE DATETIME
, LOG_FIELD LONGTEXT
);
CREATE INDEX IDX_r_log_job_1 ON r_log_job(ID_JOB);
CREATE INDEX IDX_r_log_job_2 ON r_log_job(ERRORS, STATUS, JOBNAME);
-- 作业项日志表
--KETTLE_JOBENTRY_LOG_TABLE
CREATE TABLE r_log_job_step
(
ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, RESULT BOOLEAN
, NR_RESULT_ROWS BIGINT
, NR_RESULT_FILES BIGINT
);
CREATE INDEX IDX_r_log_job_step_1 ON r_log_job_step(ID_BATCH);
-- 作业日志表:https://www.pudn.com/news/62a2d83c194b3b0e4344799b.html
comment on column T_KETTLE_JOB_LOG.id_job is '批次ID(即作业ID),自递增,主键';
comment on column T_KETTLE_JOB_LOG.channel_id is '日志通道ID(GUID),跟Logging channel log table有关联';
comment on column T_KETTLE_JOB_LOG.jobname is '作业名称';
comment on column T_KETTLE_JOB_LOG.status is '执行状态(start、end、stop、running)';
comment on column T_KETTLE_JOB_LOG.lines_read is '最后一个转换,读取的行数';
comment on column T_KETTLE_JOB_LOG.lines_written is '最后一个转换,写入的行数';
comment on column T_KETTLE_JOB_LOG.lines_updated is '最后一个转换,更新的行数';
comment on column T_KETTLE_JOB_LOG.lines_input is '最后一个转换,从存储或网络(如文件、数据库等)读取的行数';
comment on column T_KETTLE_JOB_LOG.lines_output is '最后一个转换,输出到存储或网络(如文件、数据库等)的行数';
comment on column T_KETTLE_JOB_LOG.lines_rejected is '最后一个转换,因错误处理导致拒绝的行数';
comment on column T_KETTLE_JOB_LOG.errors is '发生的错误数';
comment on column T_KETTLE_JOB_LOG.startdate is '开始执行时间(kettle的bug,始终是1900-01-01 7:00:00)';
comment on column T_KETTLE_JOB_LOG.enddate is '结束执行时间';
comment on column T_KETTLE_JOB_LOG.logdate is '最后记录日志的时间';
comment on column T_KETTLE_JOB_LOG.depdate is '依赖日期,作业设置中的依赖规则计算的最大日期。(不懂)';
comment on column T_KETTLE_JOB_LOG.replaydate is '重播日期,跟STARTDATE是同义词(不懂)';
comment on column T_KETTLE_JOB_LOG.log_field is '详细日志内容';
comment on column T_KETTLE_JOB_LOG.executing_server is '哪个服务器在执行当前作业(主机名)';
comment on column T_KETTLE_JOB_LOG.executing_user is '采用存储库方式,则是登录存储库的用户;否则为当前系统的登录用户';
comment on column T_KETTLE_JOB_LOG.start_job_entry is '当前作业,从哪个转换开始执行(为空代表是从Start开始)';
comment on column T_KETTLE_JOB_LOG.client is '客户端(SPOON、PAN、KITCHEN、CARTE)';
-- 作业日志通道表
comment on column T_KETTLE_JOB_CHANNEL_LOG.id_batch is '批次ID(即作业ID),与t_kettle_job_log的id_batch字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.channel_id is '日志通道ID(GUID),跟t_kettle_step_log的channel_id字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.log_date is '最后记录日志的时间';
comment on column T_KETTLE_JOB_CHANNEL_LOG.logging_object_type is '被记录对象的类型(如JOB、JOBENTRY、DATABASE、STEP、TRANS)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_name is '被记录对象的名称(跟LOGGING_OBJECT_TYPE是一对)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_copy is '被记录步骤对象的复制(不懂)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.repository_directory is '资源库(或存储)的目录,貌似是JOB、TRANS才有';
comment on column T_KETTLE_JOB_CHANNEL_LOG.filename is 'JOB、JOBENTRY、TRANS的路径(只有作业、转换才有)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_id is '当前对象ID';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_revision is '当前对象版本';
comment on column T_KETTLE_JOB_CHANNEL_LOG.parent_channel_id is '所属转换日志的日志通道ID,跟t_kettle_transformation_log的channel_id字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.root_channel_id is '所属作业日志的日志通道ID,跟t_kettle_job_log的channel_id字段有关联';
-- 转换日志表
comment on table T_KETTLE_TRANSFORMATION_LOG is 'KETTLE转换日志';
comment on column T_KETTLE_TRANSFORMATION_LOG.id_batch is '批次ID(即转换ID),自递增';
comment on column T_KETTLE_TRANSFORMATION_LOG.channel_id is '日志通道ID(GUID),跟Logging channel log table有关联';
comment on column T_KETTLE_TRANSFORMATION_LOG.transname is '转换名称';
comment on column T_KETTLE_TRANSFORMATION_LOG.status is '执行状态(start、end、stop、running)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_read is '特定步骤,读取的行数(没数据,没用的)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_written is '特定步骤,写入的行数(没数据,没用的)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_updated is '特定步骤,执行的更新语句的数量';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_input is '特定步骤,从存储或网络(如文件、数据库等)读取的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_output is '特定步骤,输出到存储或网络(如文件、数据库等)的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_rejected is '特定步骤,因错误处理导致拒绝的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.errors is '发生的错误数';
comment on column T_KETTLE_TRANSFORMATION_LOG.startdate is '开始执行时间(kettle的bug,有些是1900-01-01 7:00:00)';
comment on column T_KETTLE_TRANSFORMATION_LOG.enddate is '结束执行时间';
comment on column T_KETTLE_TRANSFORMATION_LOG.logdate is '最后记录日志的时间';
comment on column T_KETTLE_TRANSFORMATION_LOG.depdate is '依赖日期,作业设置中的依赖规则计算的最大日期。(不懂)';
comment on column T_KETTLE_TRANSFORMATION_LOG.replaydate is '重播日期,跟STARTDATE是同义词(不懂)';
comment on column T_KETTLE_TRANSFORMATION_LOG.log_field is '详细日志内容';
comment on column T_KETTLE_TRANSFORMATION_LOG.executing_server is '哪个服务器在执行当前作业(主机名)';
comment on column T_KETTLE_TRANSFORMATION_LOG.executing_user is '采用存储库方式,则是登录存储库的用户;否则为当前系统的登录用户';
comment on column T_KETTLE_TRANSFORMATION_LOG.client is '客户端(SPOON、PAN、KITCHEN、CARTE)';
-- 转换步骤日志表
comment on table T_KETTLE_STEP_LOG is 'KETTLE转换-步骤日志';
comment on column T_KETTLE_STEP_LOG.id_batch is '批次ID(转换ID),自递增,主键';
comment on column T_KETTLE_STEP_LOG.channel_id is '日志通道ID(GUID),跟t_kettle_job_channel_log的channel_id字段有关联';
comment on column T_KETTLE_STEP_LOG.transname is '转换名称';
comment on column T_KETTLE_STEP_LOG.stepname is '步骤名称';
comment on column T_KETTLE_STEP_LOG.step_copy is '当前步骤复制的数量';
comment on column T_KETTLE_STEP_LOG.lines_read is '从上一个步骤读取的数量';
comment on column T_KETTLE_STEP_LOG.lines_written is '输出到跟随的步骤(因为支持并发多个步骤)。假设输入1w条数据,且跟随两个步骤,那么LINES_WRITTEN是2w(1w * 2)';
comment on column T_KETTLE_STEP_LOG.lines_updated is '当前步骤,执行的更新语句的数量';
comment on column T_KETTLE_STEP_LOG.lines_input is '当前步骤,从来源(文件、数据库、网络等),读取的行数';
comment on column T_KETTLE_STEP_LOG.lines_output is '当前步骤,输出到输出端(文件、数据库、网络等)的行数';
comment on column T_KETTLE_STEP_LOG.lines_rejected is '当前步骤,因错误处理导致拒绝的行数';
comment on column T_KETTLE_STEP_LOG.errors is '当前步骤,发生的错误数';
comment on column T_KETTLE_STEP_LOG.log_field is '当前步骤,产生的详细日志内容';
comment on column T_KETTLE_STEP_LOG.log_date is '当前步骤,最后记录日志的时间';