Java 调用 Kettle API 实现数据导入 Demo
开始执行kettle的转换生成.ktr文件
依赖
package com.kettle.demo.kettle;
import net.sf.jsqlparser.statement.insert.InsertModifierPriority;
import org.apache.commons.io.FileUtils;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.core.plugins.StepPluginType;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.insertupdate.InsertUpdateMeta;
import org.pentaho.di.trans.steps.tableinput.TableInputMeta;
import java.io.File;
import java.util.Arrays;
/**
* @param
* @author zcs
* @date 2020/08/07
* @description
* @return
*/
public class TransDemo {
public static TransDemo transDemo;
/**
* 两个库中的表名
*/
public static String bjdt_tablename = "kettle_ceshi";
public static String kettle_tablename = "kettle_demo";
/**
* 数据库连接信息,适用于DatabaseMeta其中 一个构造器DatabaseMeta(String xml)
*/
public static final String[] databasesXML = {
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
"<connection>" +
"<name>bjdt</name>" +
"<server>localhost</server>" +
"<type>Mysql</type>" +
"<access>Native</access>" +
"<database>ceshi</database>" +
"<port>3306</port>" +
"<username>root</username>" +
"<password>123456</password>" +
"</connection>",
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
"<connection>" +
"<name>kettle</name>" +
"<server>ip地址</server>" +
"<type>Mysql</type>" +
"<access>Native</access>" +
"<database>数据库名称</database>" +
"<port>端口号</port>" +
"<username>用户名</username>" +
"<password>密码</password>" +
"</connection>"
};
/**
* 生成一个转化,把一个数据库中的数据转移到另一个数据库中,只有两个步骤,第一个是表输入,第二个是表插入与更新操作
* @return
* @throws KettleXMLException
*
* "SELECT name,password FROM "+bjdt_tablename +" limit 0 ,10";
*/
public TransMeta generateMyOwnTrans(String sql,String sourceName,String targetName,String [] keyLookup,String [] keyStream,String [] keyStream2,String [] keyCondition,String [] upDateLookup,String [] updateStream,String targentTableName,Boolean [] updateOrNot) throws KettleXMLException, KettleDatabaseException {
System.out.println("==========开始执行==========");
TransMeta transMeta = new TransMeta();
//设置转化的名称
transMeta.setName("insert_update");
//添加转换的数据库连接
for (int i=0;i<databasesXML.length;i++){
DatabaseMeta databaseMeta = new DatabaseMeta(databasesXML[i]);
transMeta.addDatabase(databaseMeta);
}
//registry是给每个步骤生成一个标识Id用
PluginRegistry registry = PluginRegistry.getInstance();
//第一个表输入步骤(TableInputMeta)
TableInputMeta tableInput = new TableInputMeta();
String tableInputPluginId = registry.getPluginId(StepPluginType.class, tableInput);
//给表输入添加一个DatabaseMeta连接数据库
DatabaseMeta database_bjdt = transMeta.findDatabase(sourceName);
tableInput.setDatabaseMeta(database_bjdt);
String select_sql = sql;
tableInput.setSQL(select_sql);
//添加TableInputMeta到转换中
StepMeta tableInputMetaStep = new StepMeta(tableInputPluginId,"table input",tableInput);
//给步骤添加在spoon工具中的显示位置
tableInputMetaStep.setDraw(true);
tableInputMetaStep.setLocation(100, 100);
transMeta.addStep(tableInputMetaStep);
//第二个步骤插入与更新
InsertUpdateMeta insertUpdateMeta = new InsertUpdateMeta();
String insertUpdateMetaPluginId = registry.getPluginId(StepPluginType.class,insertUpdateMeta);
//添加数据库连接
DatabaseMeta database_kettle = transMeta.findDatabase(targetName);
insertUpdateMeta.setDatabaseMeta(database_kettle);
//设置操作的表
insertUpdateMeta.setTableName(targentTableName);
//设置用来查询的关键字
//setKeyLookup 你想要对比的字段
// insertUpdateMeta.setKeyLookup(new String[]{"",""});
//源数据的字段 如果数据相同的话数据就不会新增
// insertUpdateMeta.setKeyStream(new String[]{"name","password"});
// insertUpdateMeta.setKeyStream2(new String[]{"",""});//需要导入几个字段加几个 ""
// insertUpdateMeta.setKeyCondition(new String[]{"=","="});
insertUpdateMeta.setKeyLookup(keyLookup);
insertUpdateMeta.setKeyStream(keyStream);
insertUpdateMeta.setKeyStream2(keyStream2);
insertUpdateMeta.setKeyCondition(keyCondition);
//设置要更新的字段
// updatelookup 你想要将数据放入的字段
// String[] upDateLookup = {"sex","remark"} ;
// updateStream 源数据查询的字段
// String [] updateStream = {"name","password"};
// Boolean[] updateOrNot = {false,false};
insertUpdateMeta.setUpdateLookup(upDateLookup);
insertUpdateMeta.setUpdateStream(updateStream);
insertUpdateMeta.setUpdate(updateOrNot);
String[] lookup = insertUpdateMeta.getUpdateLookup();
//添加步骤到转换中
StepMeta insertUpdateStep = new StepMeta(insertUpdateMetaPluginId,"insert_update",insertUpdateMeta);
insertUpdateStep.setDraw(true);
insertUpdateStep.setLocation(250,100);
transMeta.addStep(insertUpdateStep);
//******************************************************************
//******************************************************************
//添加hop把两个步骤关联起来
transMeta.addTransHop(new TransHopMeta(tableInputMetaStep, insertUpdateStep));
System.out.println("***********the end************");
return transMeta;
}
//使用main方法调用
public static void main(String[] args) {
try {
//源字段
String [] sourceField = {"name","password","kettle_age","kettle_sex"};
//目标字段
String [] targetField = {"name","password","age","sex"};
// 表名
String sourceTableName = "kettle_ceshi";
String targentTableName = "kettle_demo";
//从第几条数据开始查
String currentNumber = "0";
//往后查多少条 原意是在查询表信息的时候直接获取放到redis里然后这里取
String subsequentNumber = "10";
// 配置文件中 name 标签的值
String sourceName = "bjdt";
String targetName = "kettle";
StringBuffer stringBuffer = new StringBuffer();
stringBuffer.append("select ");
//setKeyLookup 你想要对比的字段
System.out.println(sourceField.length);
String [] keyLookup = new String[sourceField.length];
Boolean [] updateOrNot = new Boolean[sourceField.length];
//有几个字段写几个 "="
String [] keyCondition = new String[sourceField.length];
for (int i = 0 ;i<sourceField.length; i++){
updateOrNot[i]=false;
keyLookup[i]="\"\"";
keyCondition[i]="\"=\"";
stringBuffer.append(sourceField[i]).append(",");
}
StringBuffer sql = stringBuffer.deleteCharAt(stringBuffer.length() - 1);
sql.append(" from ").append(sourceTableName).append(" limit ").append(currentNumber).append(",").append(subsequentNumber);
//源数据的字段 如果数据相同的话数据就不会新增
String [] keyStream = sourceField;
//有几个字段写几个 ""
String [] keyStream2 = keyLookup;
// updatelookup 你想要将数据放入的字段
String [] upDateLookup = targetField;
// updateStream 源数据查询的字段
String [] updateStream = sourceField;
System.out.println("sql:"+sql);
System.out.println("sourceName:"+sourceName);
System.out.println("targetName:"+targetName);
System.out.println("targentTableName:"+targentTableName);
System.out.println("keyLookup:"+Arrays.toString(keyLookup));
System.out.println("keyStream:"+Arrays.toString(keyStream));
System.out.println("keyStream2:"+Arrays.toString(keyStream2));
System.out.println("keyCondition:"+Arrays.toString(keyCondition));
System.out.println("upDateLookup:"+Arrays.toString(upDateLookup));
System.out.println("updateStream:"+Arrays.toString(updateStream));
System.out.println("updateOrNot:"+Arrays.toString(updateOrNot));
KettleEnvironment.init();
transDemo = new TransDemo();
/**
* String sql,
* String sourceName,
* String targetName,
* String [] keyLookup,
* String [] keyStream,
* String [] keyStream2,
* String [] keyCondition,
* String [] upDateLookup,
* String [] updateStream,
* String targentTableName
* */
TransMeta transMeta = transDemo.generateMyOwnTrans(sql.toString(),sourceName,targetName,keyLookup,keyStream,keyStream2,keyCondition,upDateLookup,updateStream,targentTableName,updateOrNot);
String transXml = transMeta.getXML();
String transName = "etl/update_insert_Trans.ktr";
File file = new File(transName);
FileUtils.writeStringToFile(file, transXml, "UTF-8");
System.out.println(databasesXML.length+"\n"+databasesXML[0]+"\n"+databasesXML[1]);
} catch (Exception e) {
e.printStackTrace();
return;
}
}
}
执行之前生成好的.ktr文件
package com.kettle.demo.kettle;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.RowSet;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.ChannelLogTable;
import org.pentaho.di.core.logging.KettleLogStore;
import org.pentaho.di.core.logging.StepLogTable;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.variables.Variables;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import java.util.List;
/**
* @param
* @author zcs
* @date 2020/08/07
* @description
* @return
*/
public class RunTransDemo {
/* 这块是配合搞日志弄得 暂时没配置到表内
public static final String[] databasesXML = {
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
"<connection>" +
"<name>kettle_log</name>" +
"<server>39.98.226.242</server>" +
"<type>Mysql</type>" +
"<access>Native</access>" +
"<database>clesun_security</database>" +
"<port>2020</port>" +
"<username>root</username>" +
"<password>QsNyt@2007</password>" +
"</connection>"
};*/
public static void main(String[] args) throws KettleException {
insertUpdate();
// dbLog();
}
public static void insertUpdate() throws KettleException {
//初始化
KettleEnvironment.init();
//创建转换元数据对象 执行之前生成好的.ktr文件
TransMeta meta = new TransMeta("etl/update_insert_Trans.ktr");
Trans trans = new Trans(meta);
trans.prepareExecution(null);
trans.startThreads();
trans.waitUntilFinished();
List<RowMetaAndData> resultRows = trans.getResultRows();
for (RowMetaAndData rowMetaAndData:resultRows ){
System.out.println("===="+rowMetaAndData);
}
String strLog = KettleLogStore.getAppender().getBuffer().toString();
System.out.println("==========开始打印日志==========");
System.out.println(KettleLogStore.getAppender().getBuffer().toString());
System.out.println("==========日志打印结束==========");
System.out.println("getLastProcessed:"+trans.getLastProcessed());
String substring = strLog.substring(strLog.lastIndexOf("I=") + 1);
String successCount = substring.substring(substring.lastIndexOf("W=") + 2,substring.lastIndexOf("W=") + 3);
System.out.println("成功数:"+successCount);
System.out.println("errors:"+trans.getErrors());
if(trans.getErrors()!=0){
System.out.println("执行失败!");
}
}
//按网上的方法说可以将日志配置到数据库内,没搞出来先注释了 = =
/*
public static void dbLog() throws KettleException {
KettleEnvironment.init();
TransMeta transMeta = new TransMeta("etl/update_insert_Trans.ktr");
Trans trans = new Trans(transMeta);
for (int i=0;i<databasesXML.length;i++){
DatabaseMeta databaseMeta = new DatabaseMeta(databasesXML[i]);
transMeta.addDatabase(databaseMeta);
}
VariableSpace space = new Variables();
//将step日志数据库配置名加入到变量集中
space.setVariable("kettle_log","logdb");
space.initializeVariablesFrom(null);
ChannelLogTable channelLogTable = ChannelLogTable.getDefault(space, transMeta);
channelLogTable.setConnectionName("logdb");
channelLogTable.setTableName("kettle_data_log");
transMeta.setChannelLogTable(channelLogTable);
trans.execute(null);
trans.waitUntilFinished();
if (trans.getErrors() > 0) {
throw new RuntimeException(
"There were errors during transformation execution.");
}
}*/
}