Kettle java集成 kettle集成到web

转载

编程艺术之光 2023-11-14 09:38:21

文章标签 Kettle java集成 java System 初始化运行日志 文章分类 Java 后端开发

一、环境初始化

/**
	 * 设置Kettle的初始化配置信息路径
	 * 
	 * @throws KettleException
	 */
	private static void initKettleEnvironment() {
		
		try {
			if (!KettleEnvironment.isInitialized()) {
		        
				// JNDI的加载
				ClassPathResource jndiConResource = new ClassPathResource("kettle/");
				File file = jndiConResource.getFile();
				String sysPath = file.getCanonicalPath();
				Const.JNDI_DIRECTORY = sysPath;
				
				// 获得执行类的当前工作目录
				String userDir = System.getProperty("user.dir");
				// 用户的主目录
				String userHome = System.getProperty("user.home");
				logger.info("user.dir:{}, user.home:{}", userDir, userHome);
				System.out.println("user.dir:{"+userDir+"}, user.home:{"+userHome+"}");
			    
				// Kettle初始化需要修改相应的配置路径
			    ClassPathResource cpr = new ClassPathResource("/");
			    String classPath = cpr.getFile().getCanonicalPath();
				String kettleHome = classPath.replace(File.separatorChar+"classes", "");
				System.out.println("KETTLE_HOME:{"+kettleHome+"}");
				System.setProperty("user.dir", kettleHome);
				System.setProperty("KETTLE_HOME", kettleHome);
				// 运行环境初始化（设置主目录、注册必须的插件等）
				KettleEnvironment.init(true);//参数为true就表示为需要调用初始化jndi
				EnvUtil.environmentInit();
				// Kettle初始化完毕，还原执行类的当前路径
				System.setProperty("user.dir", userDir);
			}
		} catch (Exception e) {
			e.printStackTrace();
			logger.info("--kettle--初始化环境失败：{}", e.getMessage());
		}
	}

二、执行kettle文件

/**
	 * 运行kettle的ktr资源文件
	 * 
	 * @param ktrPath 项目资源ktr文件路径
	 * @param variableMap 参数
	 * @return
	 * @throws Exception
	 */
	public static boolean runKettleKtr(String ktrPath, Map<String, Object> variableMap) throws Exception {
		
		initKettleEnvironment();
		ClassPathResource configResource = new ClassPathResource(ktrPath);
		File ktrFile = configResource.getFile();
		String absolutePath = ktrFile.getAbsolutePath();
		TransMeta transMeta = new TransMeta(absolutePath);
		
		// 转换
		Trans trans = new Trans(transMeta);
		// 设置变量，java代码中变量会覆盖kettle里面设置的变量
		if (null != variableMap) {
			for (Iterator<Map.Entry<String, Object>> it = variableMap.entrySet().iterator(); it.hasNext();) {
				Map.Entry<String, Object> entry = it.next();
				trans.setVariable(entry.getKey(), (String) entry.getValue());
			}
		}
		// 执行转换
		trans.execute(null);
		// 等待转换执行结束
		trans.waitUntilFinished();
		if (trans.getErrors() != 0) {
			return false;
		}
		return trans.getResult().getResult();
	}

	/**
	 * 运行kettle的kjb资源文件
	 * @param kjbPath
	 * @param variableMap
	 * @return
	 * @throws Exception
	 */
	public static boolean runKettleKjb(String kjbPath, Map<String, Object> variableMap) throws Exception {
		
		initKettleEnvironment();
		ClassPathResource configResource = new ClassPathResource(kjbPath);
		File kjbFile = configResource.getFile();
		String absolutePath = kjbFile.getAbsolutePath();
		// jobname 是Job脚本的路径及名称
		JobMeta jobMeta = new JobMeta(absolutePath, null);
		Job job = new Job(null, jobMeta);
		//向Job 脚本传递参数，脚本中获取参数值：${参数名}
		if (null != variableMap) {
			for (Iterator<Map.Entry<String, Object>> it = variableMap.entrySet().iterator(); it.hasNext();) {
				Map.Entry<String, Object> entry = it.next();
				job.setVariable(entry.getKey(), (String) entry.getValue());
			}
		}
		job.start();
		job.waitUntilFinished();
		if (job.getErrors() > 0) {
			System.out.println("decompress fail!");
		}
		return job.getResult().getResult();
	}

kettle执行日志获取方式

kettle日志有多种，本文中获取的即kettle控制台的信息
各种日志信息可根据kettle操作界面中的日志设置详细了解
注：以转换做示例，job类似，只贴出了主要的获取日志代码

一、获取总日志（执行完）

// 转换
		Trans trans = new Trans(transMeta);
		trans.setLogLevel(LogLevel.BASIC);//
		// 记录日志
		String logChannelId = trans.getLogChannelId();
		LoggingBuffer appender = KettleLogStore.getAppender();
		String logText = appender.getBuffer(logChannelId, true).toString();
		System.out.println("记录日志：" + logText);

二、日志监听获取日志（执行中）

SimpleDateFormat sdf= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
		// 添加运行日志监听
		KettleLoggingEventListener kettleLogEL = new KettleLoggingEventListener() {
			@Override
			public void eventAdded(KettleLoggingEvent logs) {
				//logs为kettle运行时输出的日志。
				logger.debug("日志监听--kettle运行时日志：" + sdf.format(new Date(logs.getTimeStamp())) + " - "+logs.getMessage());
			}
		};
		KettleLogStore.getAppender().addLoggingEventListener(kettleLogEL);
		KettleLogStore.getAppender().addLoggingEventListener(kettleLogEL);
		// 执行转换
		trans.execute(null);
		// 等待转换执行结束
		trans.waitUntilFinished();

		// 删除运行日志监听
		KettleLogStore.getAppender().clear();// 该方法不起作用，原因暂未查找
		KettleLogStore.getAppender().removeLoggingEventListener(kettleLogEL);// 删除运行日志监听

三、执行日志插入数据表（执行中）
注：以job做示例，只贴出了主要的获取日志代码
日志表的创建及字段获取：

1、可根据kettle操作界面中的日志设置具体查看
2、可通过查看保存的kettle执行文件，以文本方式打开查看

// jobname 是Job脚本的路径及名称
		JobMeta jobMeta = new JobMeta(jobname , null);
		VariableSpace space = new Variables();
		// 将step日志数据库配置名加入到变量集中，portrait_app为jndi连接的名称
		space.setVariable("kettle_log", "portrait_app");
		space.initializeVariablesFrom(null);
		JobLogTable jobLogTable = JobLogTable.getDefault(space, jobMeta);
		// StepLogTable使用的数据库连接名（上面配置的变量名）。
		jobLogTable.setConnectionName("portrait_app");
		// 设置Step日志的表名
		jobLogTable.setTableName("kettle_job_log");
		jobMeta.setJobLogTable(jobLogTable);

附个人建的表结构

CREATE TABLE `kettle_job_log` (
  `id_job` bigint(20) NOT NULL COMMENT 'the batch id. it is a unique number, increased by one for each run of a job',
  `channel_id` varchar(80) DEFAULT NULL COMMENT 'the logging channel id (guid), can be matched to the logging lineage information',
  `jobname` varchar(100) DEFAULT NULL COMMENT 'the name of the job',
  `status` varchar(50) DEFAULT NULL COMMENT 'the status of the job : start, end, stopped, running',
  `lines_read` int(11) DEFAULT NULL COMMENT 'the number of lines read by the last job entry (transformation)',
  `lines_written` int(11) DEFAULT NULL COMMENT 'the number of lines writtenby the last job entry (transformation)',
  `lines_updated` int(11) DEFAULT NULL COMMENT 'the number of update statements executed by the last job entry (transformation)',
  `lines_input` int(11) DEFAULT NULL COMMENT 'the number of lines read from disk or the network by the last job entry (transformation). this is input from files, databases, etc',
  `lines_output` int(11) DEFAULT NULL COMMENT 'the number of lines written to disk or the network by the last job entry (transformation). this is input to files, databases, etc',
  `lines_rejected` int(11) DEFAULT NULL COMMENT 'the number of lines rejected with error handling by the last job entry (transformation)',
  `errors` int(11) DEFAULT NULL COMMENT 'the number of errors that occurred',
  `startdate` datetime DEFAULT NULL COMMENT 'the start of the date range for incremental (cdc) data processing. it is the end of date range of the last time this job ran correctly',
  `enddate` datetime DEFAULT NULL COMMENT 'the end of the date range for incremental (cdc) data processing',
  `logdate` datetime DEFAULT NULL COMMENT 'the update time of this log record.  if the job has status end it is the end of the job',
  `depdate` datetime DEFAULT NULL COMMENT 'the dependency date : the maximum date calculated by the dependency rules in the job settings',
  `replaydate` datetime DEFAULT NULL COMMENT 'the replay date is synonym for the start time of the job',
  `log_field` text COMMENT 'the field that will contain the complete text log of the job run.  usually this is a clob or (long) text type of field',
  `executing_server` varchar(50) DEFAULT NULL COMMENT 'the server that executed this job',
  `executing_user` varchar(50) DEFAULT NULL COMMENT 'the user that executed this job. this is the repository user if available or the os user otherwise',
  `start_job_entry` varchar(50) DEFAULT NULL COMMENT 'the name of the job entry where this job started',
  `client` varchar(50) DEFAULT NULL COMMENT 'the client which executed the job: spoon, pan, kitche, carte',
  PRIMARY KEY (`id_job`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC COMMENT='kettle的job执行日志';

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。