1. flink 命令行 application模式提交任务到yarn
可以在flink/bin目录下直接运行
flink run-application -t yarn-application ../examples/batch/WordCount.jar
来看一下这个 flink 脚本
target="$0"
# For the case, the executable has been directly symlinked, figure out
# the correct bin path by following its symlink up to an upper bound.
# Note: we can't use the readlink utility here if we want to be POSIX
# compatible.
iteration=0
while [ -L "$target" ]; do
if [ "$iteration" -gt 100 ]; then
echo "Cannot resolve path: You have a cyclic symlink in $target."
break
fi
ls=`ls -ld -- "$target"`
target=`expr "$ls" : '.* -> \(.*\)$'`
iteration=$((iteration + 1))
done
# Convert relative path to absolute path
bin=`dirname "$target"`
# get flink config
. "$bin"/config.sh
if [ "$FLINK_IDENT_STRING" = "" ]; then
FLINK_IDENT_STRING="$USER"
fi
CC_CLASSPATH=`constructFlinkClassPath`
log=$FLINK_LOG_DIR/flink-$FLINK_IDENT_STRING-client-$HOSTNAME.log
log_setting=(-Dlog.file="$log" -Dlog4j.configuration=file:"$FLINK_CONF_DIR"/log4j-cli.properties -Dlog4j.configurationFile=file:"$FLINK_CONF_DIR"/log4j-cli.properties -Dlogback.configurationFile=file:"$FLINK_CONF_DIR"/logback.xml)
# Add Client-specific JVM options
FLINK_ENV_JAVA_OPTS="${FLINK_ENV_JAVA_OPTS} ${FLINK_ENV_JAVA_OPTS_CLI}"
exec "${JAVA_RUN}" $JVM_ARGS $FLINK_ENV_JAVA_OPTS "${log_setting[@]}" -classpath "`manglePathList "$CC_CLASSPATH:$INTERNAL_HADOOP_CLASSPATHS"`" org.apache.flink.client.cli.CliFrontend "$@"
把最后一行 exec 命令打印出来
/opt/module/jdk1.8.0_212/bin/java -Dlog.file=/opt/module/flink-1.17.1/log/flink-xinyuan-client-hadoop102.log -Dlog4j.configuration=file:/opt/module/flink-1.17.1/conf/log4j-cli.properties -Dlog4j.configurationFile=file:/opt/module/flink-1.17.1/conf/log4j-cli.properties -Dlogback.configurationFile=file:/opt/module/flink-1.17.1/conf/logback.xml -classpath /opt/module/flink-1.17.1/lib/flink-cep-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-connector-files-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-csv-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-json-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-scala_2.12-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-table-api-java-uber-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-table-planner-loader-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-table-runtime-1.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-1.2-api-2.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-api-2.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-core-2.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-slf4j-impl-2.17.1.jar:/opt/module/flink-1.17.1/lib/paimon-flink-1.17-0.5-20230609.002253-36.jar:/opt/module/flink-1.17.1/lib/flink-dist-1.17.1.jar:/opt/module/hadoop-3.1.3/etc/hadoop:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/*:/opt/module/hadoop-3.1.3/share/hadoop/common/*:/opt/module/hadoop-3.1.3/share/hadoop/hdfs:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/*:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/*:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/lib/*:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/*:/opt/module/hadoop-3.1.3/share/hadoop/yarn:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/*:/opt/module/hadoop-3.1.3/share/hadoop/yarn/*:/opt/module/hadoop-3.1.3:/opt/module/hadoop-3.1.3/etc/hadoop::/opt/module/hbase/conf org.apache.flink.client.cli.CliFrontend run-application -t yarn-application ../examples/batch/WordCount.jar
参数有
-Dlog.file
-Dlog4j.configuration
-Dlog4j.configurationFile
-Dlogback.configurationFile
-classpath
org.apache.flink.client.cli.CliFrontend run-application -t yarn-application ../examples/batch/WordCount.jar
2. CliFrontend方法入口
来看一下org.apache.flink.client.cli.CliFrontend
在flink-client module中 main方法的入口。
/** Submits the job based on the arguments. */
public static void main(final String[] args) {
EnvironmentInformation.logEnvironmentInfo(LOG, "Command Line Client", args);
// 1. find the configuration directory
final String configurationDirectory = getConfigurationDirectoryFromEnv();
// 2. load the global configuration
final Configuration configuration =
GlobalConfiguration.loadConfiguration(configurationDirectory);
// 3. load the custom command lines
final List<CustomCommandLine> customCommandLines =
loadCustomCommandLines(configuration, configurationDirectory);
try {
final CliFrontend cli = new CliFrontend(configuration, customCommandLines);
SecurityUtils.install(new SecurityConfiguration(cli.configuration));
int retCode =
SecurityUtils.getInstalledContext().runSecured(() -> cli.parseAndRun(args));
System.exit(retCode);
} catch (Throwable t) {
final Throwable strippedThrowable =
ExceptionUtils.stripException(t, UndeclaredThrowableException.class);
LOG.error("Fatal error while running command line interface.", strippedThrowable);
strippedThrowable.printStackTrace();
System.exit(31);
}
}
一条一条来看
step0: 打印环境日志
EnvironmentInformation.logEnvironmentInfo(LOG, "Command Line Client", args);
具体的方法
public static void logEnvironmentInfo(
Logger log, String componentName, String[] commandLineArgs) {
if (log.isInfoEnabled()) {
RevisionInformation rev = getRevisionInformation();
String version = getVersion();
String scalaVersion = getScalaVersion();
String jvmVersion = getJvmVersion();
String[] options = getJvmStartupOptionsArray();
String javaHome = System.getenv("JAVA_HOME");
String inheritedLogs = System.getenv("FLINK_INHERITED_LOGS");
long maxHeapMegabytes = getMaxJvmHeapMemory() >>> 20;
if (inheritedLogs != null) {
log.info(
"--------------------------------------------------------------------------------");
log.info(" Preconfiguration: ");
log.info(inheritedLogs);
}
log.info(
"--------------------------------------------------------------------------------");
log.info(
" Starting "
+ componentName
+ " (Version: "
+ version
+ ", Scala: "
+ scalaVersion
+ ", "
+ "Rev:"
+ rev.commitId
+ ", "
+ "Date:"
+ rev.commitDate
+ ")");
log.info(" OS current user: " + System.getProperty("user.name"));
log.info(" Current Hadoop/Kerberos user: " + getHadoopUser());
log.info(" JVM: " + jvmVersion);
log.info(" Maximum heap size: " + maxHeapMegabytes + " MiBytes");
log.info(" JAVA_HOME: " + (javaHome == null ? "(not set)" : javaHome));
String hadoopVersionString = getHadoopVersionString();
if (hadoopVersionString != null) {
log.info(" Hadoop version: " + hadoopVersionString);
} else {
log.info(" No Hadoop Dependency available");
}
if (options.length == 0) {
log.info(" JVM Options: (none)");
} else {
log.info(" JVM Options:");
for (String s : options) {
log.info(" " + s);
}
}
if (commandLineArgs == null || commandLineArgs.length == 0) {
log.info(" Program Arguments: (none)");
} else {
log.info(" Program Arguments:");
for (String s : commandLineArgs) {
if (GlobalConfiguration.isSensitive(s)) {
log.info(
" "
+ GlobalConfiguration.HIDDEN_CONTENT
+ " (sensitive information)");
} else {
log.info(" " + s);
}
}
}
log.info(" Classpath: " + System.getProperty("java.class.path"));
log.info(
"--------------------------------------------------------------------------------");
}
}
这个没有什么,只是打印一下环境变量的日志,具体可以看一下下面。
两条长虚线之间的。
2023-06-29 21:22:16,271 INFO org.apache.flink.client.cli.CliFrontend [] - --------------------------------------------------------------------------------
2023-06-29 21:22:16,274 INFO org.apache.flink.client.cli.CliFrontend [] - Starting Command Line Client (Version: 1.17.1, Scala: 2.12, Rev:2750d5c, Date:2023-05-19T10:45:46+02:00)
2023-06-29 21:22:16,274 INFO org.apache.flink.client.cli.CliFrontend [] - OS current user: xinyuan
2023-06-29 21:22:16,447 INFO org.apache.flink.client.cli.CliFrontend [] - Current Hadoop/Kerberos user: xinyuan
2023-06-29 21:22:16,447 INFO org.apache.flink.client.cli.CliFrontend [] - JVM: Java HotSpot(TM) 64-Bit Server VM - Oracle Corporation - 1.8/25.212-b10
2023-06-29 21:22:16,447 INFO org.apache.flink.client.cli.CliFrontend [] - Arch: amd64
2023-06-29 21:22:16,447 INFO org.apache.flink.client.cli.CliFrontend [] - Maximum heap size: 1771 MiBytes
2023-06-29 21:22:16,447 INFO org.apache.flink.client.cli.CliFrontend [] - JAVA_HOME: /opt/module/jdk1.8.0_212
2023-06-29 21:22:16,449 INFO org.apache.flink.client.cli.CliFrontend [] - Hadoop version: 3.1.3
2023-06-29 21:22:16,449 INFO org.apache.flink.client.cli.CliFrontend [] - JVM Options:
2023-06-29 21:22:16,449 INFO org.apache.flink.client.cli.CliFrontend [] - -Dlog.file=/opt/module/flink-1.17.1/log/flink-xinyuan-client-hadoop102.log
2023-06-29 21:22:16,449 INFO org.apache.flink.client.cli.CliFrontend [] - -Dlog4j.configuration=file:/opt/module/flink-1.17.1/conf/log4j-cli.properties
2023-06-29 21:22:16,449 INFO org.apache.flink.client.cli.CliFrontend [] - -Dlog4j.configurationFile=file:/opt/module/flink-1.17.1/conf/log4j-cli.properties
2023-06-29 21:22:16,449 INFO org.apache.flink.client.cli.CliFrontend [] - -Dlogback.configurationFile=file:/opt/module/flink-1.17.1/conf/logback.xml
2023-06-29 21:22:16,449 INFO org.apache.flink.client.cli.CliFrontend [] - Program Arguments:
2023-06-29 21:22:16,450 INFO org.apache.flink.client.cli.CliFrontend [] - run-application
2023-06-29 21:22:16,450 INFO org.apache.flink.client.cli.CliFrontend [] - -t
2023-06-29 21:22:16,450 INFO org.apache.flink.client.cli.CliFrontend [] - yarn-application
2023-06-29 21:22:16,450 INFO org.apache.flink.client.cli.CliFrontend [] - ../examples/batch/WordCount.jar
2023-06-29 21:22:16,450 INFO org.apache.flink.client.cli.CliFrontend [] - Classpath: /opt/module/flink-1.17.1/lib/flink-cep-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-connector-files-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-csv-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-json-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-scala_2.12-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-table-api-java-uber-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-table-planner-loader-1.17.1.jar:/opt/module/flink-1.17.1/lib/flink-table-runtime-1.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-1.2-api-2.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-api-2.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-core-2.17.1.jar:/opt/module/flink-1.17.1/lib/log4j-slf4j-impl-2.17.1.jar:/opt/module/flink-1.17.1/lib/paimon-flink-1.17-0.5-20230609.002253-36.jar:/opt/module/flink-1.17.1/lib/flink-dist-1.17.1.jar:/opt/module/hadoop-3.1.3/etc/hadoop:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/accessors-smart-1.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/animal-sniffer-annotations-1.17.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/asm-5.0.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/audience-annotations-0.5.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/avro-1.7.7.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/checker-qual-2.5.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-beanutils-1.9.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-codec-1.11.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-collections-3.2.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-compress-1.18.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-configuration2-2.1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-io-2.5.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-lang-2.6.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-lang3-3.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-logging-1.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-math3-3.1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/commons-net-3.6.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/curator-client-2.13.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/curator-framework-2.13.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/curator-recipes-2.13.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/error_prone_annotations-2.2.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/failureaccess-1.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/gson-2.2.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/guava-27.0-jre.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/hadoop-annotations-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/hadoop-auth-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/htrace-core4-4.1.0-incubating.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/httpclient-4.5.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/httpcore-4.4.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/j2objc-annotations-1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jackson-annotations-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jackson-core-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jackson-core-asl-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jackson-databind-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jackson-jaxrs-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jackson-mapper-asl-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jackson-xc-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/javax.servlet-api-3.1.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jaxb-api-2.2.11.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jcip-annotations-1.0-1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jersey-core-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jersey-json-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jersey-server-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jersey-servlet-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jettison-1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-http-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-io-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-security-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-server-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-servlet-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-util-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-webapp-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jetty-xml-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jsch-0.1.54.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/json-smart-2.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jsp-api-2.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jsr305-3.0.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jsr311-api-1.1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-admin-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-client-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-common-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-core-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-crypto-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-identity-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-server-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-simplekdc-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerb-util-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerby-asn1-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerby-config-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerby-pkix-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerby-util-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/kerby-xdr-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/log4j-1.2.17.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/netty-3.10.5.Final.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/nimbus-jose-jwt-4.41.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/paranamer-2.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/protobuf-java-2.5.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/re2j-1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/slf4j-api-1.7.25.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/snappy-java-1.0.5.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/stax2-api-3.1.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/token-provider-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/woodstox-core-5.0.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/zookeeper-3.4.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/jul-to-slf4j-1.7.25.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/metrics-core-3.2.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-common-3.1.3-tests.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-common-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-nfs-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-kms-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-util-ajax-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/leveldbjni-all-1.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/netty-all-4.0.52.Final.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/okhttp-2.7.5.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/okio-1.6.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jersey-servlet-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jersey-json-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/hadoop-auth-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-codec-1.11.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/log4j-1.2.17.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/httpclient-4.5.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/httpcore-4.4.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-logging-1.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/nimbus-jose-jwt-4.41.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jcip-annotations-1.0-1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/json-smart-2.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/accessors-smart-1.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/asm-5.0.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/zookeeper-3.4.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/audience-annotations-0.5.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/netty-3.10.5.Final.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/curator-framework-2.13.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/curator-client-2.13.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/guava-27.0-jre.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/failureaccess-1.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jsr305-3.0.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/checker-qual-2.5.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/error_prone_annotations-2.2.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/j2objc-annotations-1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/animal-sniffer-annotations-1.17.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-simplekdc-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-client-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerby-config-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-core-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerby-pkix-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerby-asn1-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerby-util-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-common-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-crypto-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-io-2.5.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-util-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/token-provider-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-admin-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-server-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerb-identity-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/kerby-xdr-1.0.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jersey-core-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jsr311-api-1.1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jersey-server-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/javax.servlet-api-3.1.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/json-simple-1.1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-server-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-http-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-util-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-io-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-webapp-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-xml-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-servlet-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jetty-security-9.3.24.v20180605.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/hadoop-annotations-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-cli-1.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-math3-3.1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-net-3.6.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-collections-3.2.2.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jettison-1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jaxb-impl-2.2.3-1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jaxb-api-2.2.11.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jackson-core-asl-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jackson-mapper-asl-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jackson-jaxrs-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jackson-xc-1.9.13.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-lang-2.6.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-beanutils-1.9.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-configuration2-2.1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-lang3-3.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/avro-1.7.7.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/paranamer-2.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/snappy-java-1.0.5.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/commons-compress-1.18.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/re2j-1.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/gson-2.2.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jsch-0.1.54.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/curator-recipes-2.13.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/htrace-core4-4.1.0-incubating.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jackson-databind-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jackson-annotations-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/jackson-core-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/stax2-api-3.1.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/lib/woodstox-core-5.0.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-3.1.3-tests.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-nfs-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-client-3.1.3-tests.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-client-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-native-client-3.1.3-tests.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-native-client-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-rbf-3.1.3-tests.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-rbf-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/hdfs/hadoop-hdfs-httpfs-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/lib/hamcrest-core-1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/lib/junit-4.11.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-app-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-common-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-core-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.1.3-tests.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-nativetask-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-client-uploader-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/HikariCP-java7-2.4.12.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/aopalliance-1.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/dnsjava-2.1.7.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/ehcache-3.3.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/fst-2.50.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/geronimo-jcache_1.0_spec-1.0-alpha-1.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/guice-4.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/guice-servlet-4.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/jackson-jaxrs-base-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/jackson-jaxrs-json-provider-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/jackson-module-jaxb-annotations-2.7.8.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/java-util-1.9.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/javax.inject-1.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/jersey-client-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/jersey-guice-1.19.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/json-io-2.5.1.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/metrics-core-3.2.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/mssql-jdbc-6.2.1.jre7.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/objenesis-1.0.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/snakeyaml-1.16.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/lib/swagger-annotations-1.5.4.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-api-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-client-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-common-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-registry-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-common-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-nodemanager-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-router-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-sharedcachemanager-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-tests-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-timeline-pluginstorage-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-server-web-proxy-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-services-api-3.1.3.jar:/opt/module/hadoop-3.1.3/share/hadoop/yarn/hadoop-yarn-services-core-3.1.3.jar:/opt/module/hadoop-3.1.3:/opt/module/hadoop-3.1.3/etc/hadoop::/opt/module/hbase/conf
2023-06-29 21:22:16,451 INFO org.apache.flink.client.cli.CliFrontend [] - --------------------------------------------------------------------------------
step1:找到配置文件的目录
final String configurationDirectory = getConfigurationDirectoryFromEnv();
具体方法
public static String getConfigurationDirectoryFromEnv() {
String location = System.getenv(ConfigConstants.ENV_FLINK_CONF_DIR);
if (location != null) {
if (new File(location).exists()) {
return location;
} else {
throw new RuntimeException(
"The configuration directory '"
+ location
+ "', specified in the '"
+ ConfigConstants.ENV_FLINK_CONF_DIR
+ "' environment variable, does not exist.");
}
} else if (new File(CONFIG_DIRECTORY_FALLBACK_1).exists()) {
location = CONFIG_DIRECTORY_FALLBACK_1;
} else if (new File(CONFIG_DIRECTORY_FALLBACK_2).exists()) {
location = CONFIG_DIRECTORY_FALLBACK_2;
} else {
throw new RuntimeException(
"The configuration directory was not specified. "
+ "Please specify the directory containing the configuration file through the '"
+ ConfigConstants.ENV_FLINK_CONF_DIR
+ "' environment variable.");
}
return location;
}
找出flink conf的位置,在我这里,ENV_FLINK_CONF_DIR没有设置,
跑了CONFIG_DIRECTORY_FALLBACK_1=“…/conf”.
所以我这里的configurationDirectory 就是 /opt/module/flink-1.17.1/conf
其中包含了
step2:通过刚刚获取的flink configurationDirectory目录加载全局配置
// 2. load the global configuration
final Configuration configuration =
GlobalConfiguration.loadConfiguration(configurationDirectory);
跳转
public static Configuration loadConfiguration(final String configDir) {
return loadConfiguration(configDir, null);
}
继续跳转
public static Configuration loadConfiguration(
final String configDir, @Nullable final Configuration dynamicProperties) {
if (configDir == null) {
throw new IllegalArgumentException(
"Given configuration directory is null, cannot load configuration");
}
final File confDirFile = new File(configDir);
if (!(confDirFile.exists())) {
throw new IllegalConfigurationException(
"The given configuration directory name '"
+ configDir
+ "' ("
+ confDirFile.getAbsolutePath()
+ ") does not describe an existing directory.");
}
// get Flink yaml configuration file
// 加载 flink-conf.yaml
final File yamlConfigFile = new File(confDirFile, FLINK_CONF_FILENAME);
if (!yamlConfigFile.exists()) {
throw new IllegalConfigurationException(
"The Flink config file '"
+ yamlConfigFile
+ "' ("
+ confDirFile.getAbsolutePath()
+ ") does not exist.");
}
Configuration configuration = loadYAMLResource(yamlConfigFile);
if (dynamicProperties != null) {
configuration.addAll(dynamicProperties);
}
return configuration;
}
加载flink-conf.yaml文件,转成configuration类。
flink的configuration类本质还是一个map
protected final HashMap<String, Object> confData;
这里也可以看一下具体的flink-conf.yaml文件是什么样的
# The external address of the host on which the JobManager runs and can be
# reached by the TaskManagers and any clients which want to connect. This setting
# is only used in Standalone mode and may be overwritten on the JobManager side
# by specifying the --host <hostname> parameter of the bin/jobmanager.sh executable.
# In high availability mode, if you use the bin/start-cluster.sh script and setup
# the conf/masters file, this will be taken care of automatically. Yarn
# automatically configure the host name based on the hostname of the node where the
# JobManager runs.
jobmanager.rpc.address: hadoop102
# The RPC port where the JobManager is reachable.
jobmanager.rpc.port: 6123
# The host interface the JobManager will bind to. By default, this is localhost, and will prevent
# the JobManager from communicating outside the machine/container it is running on.
# On YARN this setting will be ignored if it is set to 'localhost', defaulting to 0.0.0.0.
# On Kubernetes this setting will be ignored, defaulting to 0.0.0.0.
#
# To enable this, set the bind-host address to one that has access to an outside facing network
# interface, such as 0.0.0.0.
jobmanager.bind-host: 0.0.0.0
# The total process memory size for the JobManager.
#
# Note this accounts for all memory usage within the JobManager process, including JVM metaspace and other overhead.
jobmanager.memory.process.size: 1600m
# The host interface the TaskManager will bind to. By default, this is localhost, and will prevent
# the TaskManager from communicating outside the machine/container it is running on.
# On YARN this setting will be ignored if it is set to 'localhost', defaulting to 0.0.0.0.
# On Kubernetes this setting will be ignored, defaulting to 0.0.0.0.
#
# To enable this, set the bind-host address to one that has access to an outside facing network
# interface, such as 0.0.0.0.
taskmanager.bind-host: 0.0.0.0
# The address of the host on which the TaskManager runs and can be reached by the JobManager and
# other TaskManagers. If not specified, the TaskManager will try different strategies to identify
# the address.
#
# Note this address needs to be reachable by the JobManager and forward traffic to one of
# the interfaces the TaskManager is bound to (see 'taskmanager.bind-host').
#
# Note also that unless all TaskManagers are running on the same machine, this address needs to be
# configured separately for each TaskManager.
taskmanager.host: hadoop102
# The total process memory size for the TaskManager.
#
# Note this accounts for all memory usage within the TaskManager process, including JVM metaspace and other overhead.
taskmanager.memory.process.size: 1728m
# To exclude JVM metaspace and overhead, please, use total Flink memory size instead of 'taskmanager.memory.process.size'.
# It is not recommended to set both 'taskmanager.memory.process.size' and Flink memory.
#
# taskmanager.memory.flink.size: 1280m
# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline.
taskmanager.numberOfTaskSlots: 4
# The parallelism used for programs that did not specify and other parallelism.
parallelism.default: 1
# The default file system scheme and authority.
#
# By default file paths without scheme are interpreted relative to the local
# root file system 'file:///'. Use this to override the default and interpret
# relative paths relative to a different file system,
# for example 'hdfs://mynamenode:12345'
#
# fs.default-scheme
#==============================================================================
# High Availability
#==============================================================================
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
#
# high-availability.type: zookeeper
# The path where metadata for master recovery is persisted. While ZooKeeper stores
# the small ground truth for checkpoint and leader election, this location stores
# the larger objects, like persisted dataflow graphs.
#
# Must be a durable file system that is accessible from all nodes
# (like HDFS, S3, Ceph, nfs, ...)
#
# high-availability.storageDir: hdfs:///flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
# setup. This must be a list of the form:
# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
#
# high-availability.zookeeper.quorum: localhost:2181
# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
# The default value is "open" and it can be changed to "creator" if ZK security is enabled
#
# high-availability.zookeeper.client.acl: open
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled. Checkpointing is enabled when execution.checkpointing.interval > 0.
#
# Execution checkpointing related parameters. Please refer to CheckpointConfig and ExecutionCheckpointingOptions for more details.
#
# execution.checkpointing.interval: 3min
# execution.checkpointing.externalized-checkpoint-retention: [DELETE_ON_CANCELLATION, RETAIN_ON_CANCELLATION]
# execution.checkpointing.max-concurrent-checkpoints: 1
# execution.checkpointing.min-pause: 0
# execution.checkpointing.mode: [EXACTLY_ONCE, AT_LEAST_ONCE]
# execution.checkpointing.timeout: 10min
# execution.checkpointing.tolerable-failed-checkpoints: 0
# execution.checkpointing.unaligned: false
#
# Supported backends are 'hashmap', 'rocksdb', or the
# <class-name-of-factory>.
#
# state.backend.type: hashmap
# Directory for checkpoints filesystem, when using any of the default bundled
# state backends.
#
# state.checkpoints.dir: hdfs://namenode-host:port/flink-checkpoints
# Default target directory for savepoints, optional.
#
# state.savepoints.dir: hdfs://namenode-host:port/flink-savepoints
# Flag to enable/disable incremental checkpoints for backends that
# support incremental checkpoints (like the RocksDB state backend).
#
# state.backend.incremental: false
# The failover strategy, i.e., how the job computation recovers from task failures.
# Only restart tasks that may have been affected by the task failure, which typically includes
# downstream tasks and potentially upstream tasks if their produced data is no longer available for consumption.
jobmanager.execution.failover-strategy: region
#==============================================================================
# Rest & web frontend
#==============================================================================
# The port to which the REST client connects to. If rest.bind-port has
# not been specified, then the server will bind to this port as well.
#
#rest.port: 8081
# The address to which the REST client will connect to
#
rest.address: hadoop102
# Port range for the REST and web server to bind to.
#
#rest.bind-port: 8080-8090
# The address that the REST & web server binds to
# By default, this is localhost, which prevents the REST & web server from
# being able to communicate outside of the machine/container it is running on.
#
# To enable this, set the bind address to one that has access to outside-facing
# network interface, such as 0.0.0.0.
#
rest.bind-address: 0.0.0.0
都是一些flink的配置,也可以在命令行自己指定。
step3:加载自定义命令行
现在我们有了基于flink-conf.yaml生成的configuration。
传入生成的configuration和对应的dir:/opt/module/flink-1.17.1/conf。
final List<CustomCommandLine> customCommandLines =
loadCustomCommandLines(configuration, configurationDirectory);
具体方法
public static List<CustomCommandLine> loadCustomCommandLines(
Configuration configuration, String configurationDirectory) {
List<CustomCommandLine> customCommandLines = new ArrayList<>();
//首先加载通用的命令行
customCommandLines.add(new GenericCLI(configuration, configurationDirectory));
// Command line interface of the YARN session, with a special initialization here
// to prefix all options with y/yarn.
final String flinkYarnSessionCLI = "org.apache.flink.yarn.cli.FlinkYarnSessionCli";
//通过反射的方法,在加载yarn的命令行
try {
customCommandLines.add(
loadCustomCommandLine(
flinkYarnSessionCLI,
configuration,
configurationDirectory,
"y",
"yarn"));
} catch (NoClassDefFoundError | Exception e) {
final String errorYarnSessionCLI = "org.apache.flink.yarn.cli.FallbackYarnSessionCli";
try {
LOG.info("Loading FallbackYarnSessionCli");
customCommandLines.add(loadCustomCommandLine(errorYarnSessionCLI, configuration));
} catch (Exception exception) {
LOG.warn("Could not load CLI class {}.", flinkYarnSessionCLI, e);
}
}
// Tips: DefaultCLI must be added at last, because getActiveCustomCommandLine(..) will get
// the
// active CustomCommandLine in order and DefaultCLI isActive always return true.
//最后加载默认的命令行。
customCommandLines.add(new DefaultCLI());
return customCommandLines;
}
返回的是CustomCommandLine的list,先来看看CustomCommandLine,是一个接口。
/** Custom command-line interface to load hooks for the command-line interface. */
public interface CustomCommandLine {
/**
* Signals whether the custom command-line wants to execute or not.
*
* @param commandLine The command-line options
* @return True if the command-line wants to run, False otherwise
*/
boolean isActive(CommandLine commandLine);
/**
* Gets the unique identifier of this CustomCommandLine.
*
* @return A unique identifier
*/
String getId();
/**
* Adds custom options to the existing run options.
*
* @param baseOptions The existing options.
*/
void addRunOptions(Options baseOptions);
/**
* Adds custom options to the existing general options.
*
* @param baseOptions The existing options.
*/
void addGeneralOptions(Options baseOptions);
/**
* Materializes the command line arguments in the given {@link CommandLine} to a {@link
* Configuration} and returns it.
*/
Configuration toConfiguration(CommandLine commandLine) throws FlinkException;
default CommandLine parseCommandLineOptions(String[] args, boolean stopAtNonOptions)
throws CliArgsException {
final Options options = new Options();
addGeneralOptions(options);
addRunOptions(options);
return CliFrontendParser.parse(options, args, stopAtNonOptions);
}
}
封装命令行接口,依次加入CustomCommandLine的实现类,顺序为GenericCLI,FlinkYarnSessionCli,DefaultCLI。
至此,其实我们也还不清楚这个CustomCommandLine有什么用,只是了解了有三个实现类放入了list中。
之后,碰到再来看。
step4:生成 CliFrontend cli
final CliFrontend cli = new CliFrontend(configuration, customCommandLines);
看一下具体的构造器
public CliFrontend(Configuration configuration, List<CustomCommandLine> customCommandLines) {
this(configuration, new DefaultClusterClientServiceLoader(), customCommandLines);
}
public CliFrontend(
Configuration configuration,
ClusterClientServiceLoader clusterClientServiceLoader,
List<CustomCommandLine> customCommandLines) {
this.configuration = checkNotNull(configuration);
this.customCommandLines = checkNotNull(customCommandLines);
this.clusterClientServiceLoader = checkNotNull(clusterClientServiceLoader);
// 2. 初始化文件系统
FileSystem.initialize(
configuration, PluginUtils.createPluginManagerFromRootFolder(configuration));
this.customCommandLineOptions = new Options();
// 3. 给命令行对象添加选项
for (CustomCommandLine customCommandLine : customCommandLines) {
customCommandLine.addGeneralOptions(customCommandLineOptions);
customCommandLine.addRunOptions(customCommandLineOptions);
}
this.clientTimeout = configuration.get(ClientOptions.CLIENT_TIMEOUT);
this.defaultParallelism = configuration.getInteger(CoreOptions.DEFAULT_PARALLELISM);
}
这里用了一个重载的方法,生成了一个新的new DefaultClusterClientServiceLoader。
DefaultClusterClientServiceLoader 在 Apache Flink 中的作用是加载和实例化 ClusterClientFactory 实现类,从而为用户提供创建 Flink 客户端的入口。
等具体用到,我们再来看。
接着来看下面的构造器,接收了三个对象,最后设置Timeout和defaultParallelism,defaultParallelism在flnk-conf中设置了1,timeout没有设置用了默认的60s。
public static final ConfigOption<Duration> CLIENT_TIMEOUT =
ConfigOptions.key("client.timeout")
.durationType()
.defaultValue(Duration.ofSeconds(60))
.withDeprecatedKeys(
"akka.client.timeout") // the deprecated AkkaOptions.CLIENT_TIMEOUT
.withDescription("Timeout on the client side.");
step5:加载安全配置模块
SecurityUtils.install(new SecurityConfiguration(cli.configuration));
1.生成 SecurityConfiguration
我们先分析下SecurityConfiguration对象的初始化,然后再分析SecurityUtils的install逻辑。
构造器
public SecurityConfiguration(Configuration flinkConf) {
this(
flinkConf,
flinkConf.get(SECURITY_CONTEXT_FACTORY_CLASSES),
flinkConf.get(SECURITY_MODULE_FACTORY_CLASSES));
}
又是重载的
public SecurityConfiguration(
Configuration flinkConf,
List<String> securityContextFactory,
List<String> securityModuleFactories) {
this.isZkSaslDisable = flinkConf.getBoolean(SecurityOptions.ZOOKEEPER_SASL_DISABLE);
this.keytab = flinkConf.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB);
this.principal = flinkConf.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL);
this.useTicketCache = flinkConf.getBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE);
this.loginContextNames =
parseList(flinkConf.getString(SecurityOptions.KERBEROS_LOGIN_CONTEXTS));
this.zkServiceName = flinkConf.getString(SecurityOptions.ZOOKEEPER_SASL_SERVICE_NAME);
this.zkLoginContextName =
flinkConf.getString(SecurityOptions.ZOOKEEPER_SASL_LOGIN_CONTEXT_NAME);
this.securityModuleFactories = Collections.unmodifiableList(securityModuleFactories);
this.securityContextFactory = securityContextFactory;
this.flinkConfig = checkNotNull(flinkConf);
validate();
}
isZkSaslDisable 我们没有设置默认就是false
keytab 没有设置,也没有默认
zkService 默认zookeeper
zkLoginContextName 默认 client
这里的参数基本都没有设置。
再来看最后传入的securityContextFactory 和 securityModuleFactories。
securityContextFactory默认的两个类
"org.apache.flink.runtime.security.contexts.HadoopSecurityContextFactory",
"org.apache.flink.runtime.security.contexts.NoOpSecurityContextFactory"
securityModuleFactories
"org.apache.flink.runtime.security.modules.HadoopModuleFactory",
"org.apache.flink.runtime.security.modules.JaasModuleFactory",
"org.apache.flink.runtime.security.modules.ZookeeperModuleFactory"
如果全局配置(flink-conf.yaml)里配置了security.kerberos.login.keytab这个参数。那么要校验这个配置所指定的目录存在以及可读。这里其实有必要对kerberos的安全认证相关知识了解下。
最后调用validate(),其实这里keytab是空的,就直接返回了。
private void validate() {
if (!StringUtils.isBlank(keytab)) {
// principal is required
if (StringUtils.isBlank(principal)) {
throw new IllegalConfigurationException(
"Kerberos login configuration is invalid: keytab requires a principal.");
}
// check the keytab is readable
File keytabFile = new File(keytab);
if (!keytabFile.exists() || !keytabFile.isFile()) {
throw new IllegalConfigurationException(
"Kerberos login configuration is invalid: keytab ["
+ keytab
+ "] doesn't exist!");
} else if (!keytabFile.canRead()) {
throw new IllegalConfigurationException(
"Kerberos login configuration is invalid: keytab ["
+ keytab
+ "] is unreadable!");
}
}
}
也就是说此时我们在原有的configuration上,添加了几个安全的类的路径得到了SecurityConfiguration。
2.SecurityUtils.install
public static void install(SecurityConfiguration config) throws Exception {
// Install the security modules first before installing the security context
installModules(config);
installContext(config);
}
此时刚好就是传入的securityContextFactory和securityModuleFactories。
先看installModules。
static void installModules(SecurityConfiguration config) throws Exception {
// install the security module factories
List<SecurityModule> modules = new ArrayList<>();
for (String moduleFactoryClass : config.getSecurityModuleFactories()) {
SecurityModuleFactory moduleFactory = null;
try {
moduleFactory = SecurityFactoryServiceLoader.findModuleFactory(moduleFactoryClass);
} catch (NoMatchSecurityFactoryException ne) {
LOG.error("Unable to instantiate security module factory {}", moduleFactoryClass);
throw new IllegalArgumentException("Unable to find module factory class", ne);
}
SecurityModule module = moduleFactory.createModule(config);
// can be null if a SecurityModule is not supported in the current environment
if (module != null) {
module.install();
modules.add(module);
}
}
installedModules = modules;
}
这里安装的安全模板主要包括了Java认证与授权服务(JAAS),Hadoop用户组信息(UGI)和Zookeeper的全过程安全设置。
详细就不再看了。
step6:核心逻辑来了
int retCode = SecurityUtils.getInstalledContext().runSecured(() -> cli.parseAndRun(args));
首先来看parseAndRun(args)
public int parseAndRun(String[] args) {
// check for action
if (args.length < 1) {
CliFrontendParser.printHelp(customCommandLines);
System.out.println("Please specify an action.");
return 1;
}
// get action
String action = args[0];
// remove action from parameters
final String[] params = Arrays.copyOfRange(args, 1, args.length);
try {
// do action
switch (action) {
case ACTION_RUN:
run(params);
return 0;
case ACTION_RUN_APPLICATION:
runApplication(params);
return 0;
case ACTION_LIST:
list(params);
return 0;
case ACTION_INFO:
info(params);
return 0;
case ACTION_CANCEL:
cancel(params);
return 0;
case ACTION_STOP:
stop(params);
return 0;
case ACTION_SAVEPOINT:
savepoint(params);
return 0;
case "-h":
case "--help":
CliFrontendParser.printHelp(customCommandLines);
return 0;
case "-v":
case "--version":
String version = EnvironmentInformation.getVersion();
String commitID = EnvironmentInformation.getRevisionInformation().commitId;
System.out.print("Version: " + version);
System.out.println(
commitID.equals(EnvironmentInformation.UNKNOWN)
? ""
: ", Commit ID: " + commitID);
return 0;
default:
System.out.printf("\"%s\" is not a valid action.\n", action);
System.out.println();
System.out.println(
"Valid actions are \"run\", \"run-application\", \"list\", \"info\", \"savepoint\", \"stop\", or \"cancel\".");
System.out.println();
System.out.println(
"Specify the version option (-v or --version) to print Flink version.");
System.out.println();
System.out.println(
"Specify the help option (-h or --help) to get help on the command.");
return 1;
}
} catch (CliArgsException ce) {
return handleArgException(ce);
} catch (ProgramParametrizationException ppe) {
return handleParametrizationException(ppe);
} catch (ProgramMissingJobException pmje) {
return handleMissingJobException();
} catch (Exception e) {
return handleError(e);
}
}
此处我们的第一个arg 是 run-application,会调用runApplication(params)方法,params是去掉第一个的剩余args。
现在的args只有 -t yarn-application …/examples/batch/WordCount.jar
继续看runApplication
protected void runApplication(String[] args) throws Exception {
LOG.info("Running 'run-application' command.");
final Options commandOptions = CliFrontendParser.getRunCommandOptions();
final CommandLine commandLine = getCommandLine(commandOptions, args, true);
if (commandLine.hasOption(HELP_OPTION.getOpt())) {
CliFrontendParser.printHelpForRunApplication(customCommandLines);
return;
}
final CustomCommandLine activeCommandLine =
validateAndGetActiveCommandLine(checkNotNull(commandLine));
final ApplicationDeployer deployer =
new ApplicationClusterDeployer(clusterClientServiceLoader);
final ProgramOptions programOptions;
final Configuration effectiveConfiguration;
// No need to set a jarFile path for Pyflink job.
if (ProgramOptionsUtils.isPythonEntryPoint(commandLine)) {
programOptions = ProgramOptionsUtils.createPythonProgramOptions(commandLine);
effectiveConfiguration =
getEffectiveConfiguration(
activeCommandLine,
commandLine,
programOptions,
Collections.emptyList());
} else {
programOptions = new ProgramOptions(commandLine);
programOptions.validate();
final URI uri = PackagedProgramUtils.resolveURI(programOptions.getJarFilePath());
effectiveConfiguration =
getEffectiveConfiguration(
activeCommandLine,
commandLine,
programOptions,
Collections.singletonList(uri.toString()));
}
final ApplicationConfiguration applicationConfiguration =
new ApplicationConfiguration(
programOptions.getProgramArgs(), programOptions.getEntryPointClassName());
deployer.run(effectiveConfiguration, applicationConfiguration);
}
1.CliFrontendParser.getRunCommandOptions();
看第一行。主要是添加可以被识别的命令行参数配置项。
final Options commandOptions = CliFrontendParser.getRunCommandOptions();
跳转看
public static Options getRunCommandOptions() {
//增加flink -h --help选项
Options options = buildGeneralOptions(new Options());
options = getProgramSpecificOptions(options);
options.addOption(SAVEPOINT_PATH_OPTION);
return options.addOption(SAVEPOINT_ALLOW_NON_RESTORED_OPTION);
}
增加可以获取的配置项,用于命令后参数,比如第一个JAR_OPTION用于识别 -j
private static Options getProgramSpecificOptions(Options options) {
options.addOption(JAR_OPTION);
options.addOption(CLASS_OPTION);
options.addOption(CLASSPATH_OPTION);
options.addOption(PARALLELISM_OPTION);
options.addOption(ARGS_OPTION);
options.addOption(DETACHED_OPTION);
options.addOption(SHUTDOWN_IF_ATTACHED_OPTION);
options.addOption(YARN_DETACHED_OPTION);
options.addOption(PY_OPTION);
options.addOption(PYFILES_OPTION);
options.addOption(PYMODULE_OPTION);
options.addOption(PYREQUIREMENTS_OPTION);
options.addOption(PYARCHIVE_OPTION);
options.addOption(PYEXEC_OPTION);
return options;
}
2.getCommandLine(commandOptions, args, true);
接着第二行代码
final CommandLine commandLine = getCommandLine(commandOptions, args, true);
查看方法
public CommandLine getCommandLine(
final Options commandOptions, final String[] args, final boolean stopAtNonOptions)
throws CliArgsException {
final Options commandLineOptions =
CliFrontendParser.mergeOptions(commandOptions, customCommandLineOptions);
return CliFrontendParser.parse(commandLineOptions, args, stopAtNonOptions);
}
先看mergeoption,这里的customCommandLineOptions,就是添加commdline生成的,里面并没有东西
public static Options mergeOptions(@Nullable Options optionsA, @Nullable Options optionsB) {
final Options resultOptions = new Options();
if (optionsA != null) {
for (Option option : optionsA.getOptions()) {
resultOptions.addOption(option);
}
}
if (optionsB != null) {
for (Option option : optionsB.getOptions()) {
resultOptions.addOption(option);
}
}
return resultOptions;
}
所以只要看最后的return CliFrontendParser.parse(commandLineOptions, args, stopAtNonOptions);
public static CommandLine parse(Options options, String[] args, boolean stopAtNonOptions)
throws CliArgsException {
final DefaultParser parser = new DefaultParser();
try {
return parser.parse(options, args, stopAtNonOptions);
} catch (ParseException e) {
throw new CliArgsException(e.getMessage());
}
}
继续看
public CommandLine parse(Options options, String[] arguments, boolean stopAtNonOption) throws ParseException {
return this.parse(options, arguments, (Properties)null, stopAtNonOption);
}
继续
public CommandLine parse(Options options, String[] arguments, Properties properties, boolean stopAtNonOption) throws ParseException {
this.options = options;
this.stopAtNonOption = stopAtNonOption;
this.skipParsing = false;
this.currentOption = null;
this.expectedOpts = new ArrayList(options.getRequiredOptions());
Iterator var5 = options.getOptionGroups().iterator();
while(var5.hasNext()) {
OptionGroup group = (OptionGroup)var5.next();
group.setSelected((Option)null);
}
this.cmd = new CommandLine();
if (arguments != null) {
String[] var9 = arguments;
int var10 = arguments.length;
for(int var7 = 0; var7 < var10; ++var7) {
String argument = var9[var7];
this.handleToken(argument);
}
}
this.checkRequiredArgs();
this.handleProperties(properties);
this.checkRequiredOptions();
return this.cmd;
}
options.getOptionGroups().iterator(); 应该是空的,到现在并没做过groups的操作。
我们的optioins只做过add option一个方法。
再来看此时就handleToken每一个参数了,再来回顾一下我们最开始中args有哪些。
run-application -t yarn-application …/examples/batch/WordCount.jar
其中run-application已经用过,并且删掉了。
就剩下 -t yarn-application …/examples/batch/WordCount.jar
private void handleToken(String token) throws ParseException {
this.currentToken = token;
if (this.skipParsing) {
this.cmd.addArg(token);
} else if ("--".equals(token)) {
this.skipParsing = true;
} else if (this.currentOption != null && this.currentOption.acceptsArg() && this.isArgument(token)) {
this.currentOption.addValueForProcessing(Util.stripLeadingAndTrailingQuotes(token));
} else if (token.startsWith("--")) {
this.handleLongOption(token);
} else if (token.startsWith("-") && !"-".equals(token)) {
this.handleShortAndLongOption(token);
} else {
this.handleUnknownToken(token);
}
if (this.currentOption != null && !this.currentOption.acceptsArg()) {
this.currentOption = null;
}
}
这里可看出flink分类- 是shortOption和-- 是longOption。
主要是做可匹配的参数options和我们自己输入的args的参数匹配。
这里commandLine包含target。
3.validateAndGetActiveCommandLine(checkNotNull(commandLine))
用于获取active的commandLine,之前按照顺序是generic,yarn,default。
public CustomCommandLine validateAndGetActiveCommandLine(CommandLine commandLine) {
LOG.debug("Custom commandlines: {}", customCommandLines);
for (CustomCommandLine cli : customCommandLines) {
LOG.debug(
"Checking custom commandline {}, isActive: {}", cli, cli.isActive(commandLine));
if (cli.isActive(commandLine)) {
return cli;
}
}
throw new IllegalStateException("No valid command-line found.");
}
generic的isActive实现
@Override
public boolean isActive(CommandLine commandLine) {
return configuration.getOptional(DeploymentOptions.TARGET).isPresent()
|| commandLine.hasOption(executorOption.getOpt())
|| commandLine.hasOption(targetOption.getOpt());
}
因为我们这里设置了command line -t 是yarn-application 所以我们这里直接使用了generic的实现类。
4.new ApplicationClusterDeployer(clusterClientServiceLoader)
final ApplicationDeployer deployer =
new ApplicationClusterDeployer(clusterClientServiceLoader);
其中 clusterClientServiceLoader 就是之前在构造器里面new的,并没有设置什么。
此时生成applicationClusterDeploye的构造器也只是传入defaultLoador,并没有做什么。
public ApplicationClusterDeployer(final ClusterClientServiceLoader clientServiceLoader) {
this.clientServiceLoader = checkNotNull(clientServiceLoader);
}
5. new ProgramOptions(commandLine)
programOptions = new ProgramOptions(commandLine);
programOptions.validate();
来看ProgramOptions构造器
protected ProgramOptions(CommandLine line) throws CliArgsException {
super(line);
this.entryPointClass =
line.hasOption(CLASS_OPTION.getOpt())
? line.getOptionValue(CLASS_OPTION.getOpt())
: null;
this.jarFilePath =
line.hasOption(JAR_OPTION.getOpt())
? line.getOptionValue(JAR_OPTION.getOpt())
: null;
this.programArgs = extractProgramArgs(line);
List<URL> classpaths = new ArrayList<URL>();
if (line.hasOption(CLASSPATH_OPTION.getOpt())) {
for (String path : line.getOptionValues(CLASSPATH_OPTION.getOpt())) {
try {
classpaths.add(new URL(path));
} catch (MalformedURLException e) {
throw new CliArgsException("Bad syntax for classpath: " + path);
}
}
}
this.classpaths = classpaths;
if (line.hasOption(PARALLELISM_OPTION.getOpt())) {
String parString = line.getOptionValue(PARALLELISM_OPTION.getOpt());
try {
parallelism = Integer.parseInt(parString);
if (parallelism <= 0) {
throw new NumberFormatException();
}
} catch (NumberFormatException e) {
throw new CliArgsException(
"The parallelism must be a positive number: " + parString);
}
} else {
parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
}
detachedMode =
line.hasOption(DETACHED_OPTION.getOpt())
|| line.hasOption(YARN_DETACHED_OPTION.getOpt());
shutdownOnAttachedExit = line.hasOption(SHUTDOWN_IF_ATTACHED_OPTION.getOpt());
this.savepointSettings = CliFrontendParser.createSavepointRestoreSettings(line);
}
先看父类构造器
protected CommandLineOptions(CommandLine line) {
this.printHelp = line.hasOption(HELP_OPTION.getOpt());
}
添加help option,直接跳过。
一点一点看。
this.entryPointClass =
line.hasOption(CLASS_OPTION.getOpt())
? line.getOptionValue(CLASS_OPTION.getOpt())
: null;
我们的commandLine CLASS_OPTION 并没有指定 -c,所以是null。
this.jarFilePath =
line.hasOption(JAR_OPTION.getOpt())
? line.getOptionValue(JAR_OPTION.getOpt())
: null;
同理,没有指定 -j
继续看下一行
this.programArgs = extractProgramArgs(line);
protected String[] extractProgramArgs(CommandLine line) {
String[] args =
line.hasOption(ARGS_OPTION.getOpt())
? line.getOptionValues(ARGS_OPTION.getOpt())
: line.getArgs();
if (args.length > 0 && !line.hasOption(JAR_OPTION.getOpt())) {
jarFilePath = args[0];
args = Arrays.copyOfRange(args, 1, args.length);
}
return args;
}
同样没有ARGS_OPTION,来看line.getArgs();
public String[] getArgs() {
String[] answer = new String[this.args.size()];
this.args.toArray(answer);
return answer;
}
就是把linkedlist的args编程string[]。
跳转出来
我们的jarFilePath = args[0]; 终于设置成了…/examples/batch/WordCount.jar。
继续看
List<URL> classpaths = new ArrayList<URL>();
if (line.hasOption(CLASSPATH_OPTION.getOpt())) {
for (String path : line.getOptionValues(CLASSPATH_OPTION.getOpt())) {
try {
classpaths.add(new URL(path));
} catch (MalformedURLException e) {
throw new CliArgsException("Bad syntax for classpath: " + path);
}
}
}
this.classpaths = classpaths;
classpaths,我们也并没有设置 -C
if (line.hasOption(PARALLELISM_OPTION.getOpt())) {
String parString = line.getOptionValue(PARALLELISM_OPTION.getOpt());
try {
parallelism = Integer.parseInt(parString);
if (parallelism <= 0) {
throw new NumberFormatException();
}
} catch (NumberFormatException e) {
throw new CliArgsException(
"The parallelism must be a positive number: " + parString);
}
} else {
parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
}
同样没有设置-p
这里的 parallelism = ExecutionConfig.PARALLELISM_DEFAULT 是 -1;
detachedMode =
line.hasOption(DETACHED_OPTION.getOpt())
|| line.hasOption(YARN_DETACHED_OPTION.getOpt());
shutdownOnAttachedExit = line.hasOption(SHUTDOWN_IF_ATTACHED_OPTION.getOpt());
this.savepointSettings = CliFrontendParser.createSavepointRestoreSettings(line);
我们统统没有设置。
看最后一个savepoint。
public static SavepointRestoreSettings createSavepointRestoreSettings(CommandLine commandLine) {
if (commandLine.hasOption(SAVEPOINT_PATH_OPTION.getOpt())) {
String savepointPath = commandLine.getOptionValue(SAVEPOINT_PATH_OPTION.getOpt());
boolean allowNonRestoredState =
commandLine.hasOption(SAVEPOINT_ALLOW_NON_RESTORED_OPTION.getOpt());
return SavepointRestoreSettings.forPath(savepointPath, allowNonRestoredState);
} else {
return SavepointRestoreSettings.none();
}
}
同样没有设置 -s,最终返回的是
private static final SavepointRestoreSettings NONE = new SavepointRestoreSettings(null, false);
最终得到的是带jarFilePath的ProgramOptions。
6.programOptions.validate();
public void validate() throws CliArgsException {
// Java program should be specified a JAR file
if (getJarFilePath() == null) {
throw new CliArgsException("Java program should be specified a JAR file.");
}
}
这里就可以看出只需要一个jarFilePath的就可以了。
7.PackagedProgramUtils.resolveURI(programOptions.getJarFilePath())
传入的是jar path。
public static URI resolveURI(String path) throws URISyntaxException {
final URI uri = new URI(path);
if (uri.getScheme() != null) {
return uri;
}
return new File(path).getAbsoluteFile().toURI();
}
转成绝对路径。
8.getEffectiveConfiguration
effectiveConfiguration =
getEffectiveConfiguration(
activeCommandLine,
commandLine,
programOptions,
Collections.singletonList(uri.toString()));
点进去看
private <T> Configuration getEffectiveConfiguration(
final CustomCommandLine activeCustomCommandLine,
final CommandLine commandLine,
final ProgramOptions programOptions,
final List<T> jobJars)
throws FlinkException {
final Configuration effectiveConfiguration =
getEffectiveConfiguration(activeCustomCommandLine, commandLine);
final ExecutionConfigAccessor executionParameters =
ExecutionConfigAccessor.fromProgramOptions(
checkNotNull(programOptions), checkNotNull(jobJars));
executionParameters.applyToConfiguration(effectiveConfiguration);
LOG.debug(
"Effective configuration after Flink conf, custom commandline, and program options: {}",
effectiveConfiguration);
return effectiveConfiguration;
}
1.先看第一行:getEffectiveConfiguration
private <T> Configuration getEffectiveConfiguration(
final CustomCommandLine activeCustomCommandLine, final CommandLine commandLine)
throws FlinkException {
final Configuration effectiveConfiguration = new Configuration(configuration);
final Configuration commandLineConfiguration =
checkNotNull(activeCustomCommandLine).toConfiguration(commandLine);
effectiveConfiguration.addAll(commandLineConfiguration);
return effectiveConfiguration;
}
这里又看到了configuration,这个是flink-conf.yaml生成的。
继续看.toConfiguration(commandLine);
这里要注意的activeCustomCommandLine 和 commandLine。
activeCustomCommandLine 是genericCli的接口,其中有configuration。
commandLine包含了输入的options和args。
@Override
public Configuration toConfiguration(final CommandLine commandLine) {
final Configuration resultConfiguration = new Configuration();
final String executorName = commandLine.getOptionValue(executorOption.getOpt());
if (executorName != null) {
resultConfiguration.setString(DeploymentOptions.TARGET, executorName);
}
final String targetName = commandLine.getOptionValue(targetOption.getOpt());
if (targetName != null) {
resultConfiguration.setString(DeploymentOptions.TARGET, targetName);
}
DynamicPropertiesUtil.encodeDynamicProperties(commandLine, resultConfiguration);
resultConfiguration.set(DeploymentOptionsInternal.CONF_DIR, configurationDir);
return resultConfiguration;
}
executorOption和targetOption都没有设置,
继续看 DynamicPropertiesUtil.encodeDynamicProperties(commandLine, resultConfiguration);
static void encodeDynamicProperties(
final CommandLine commandLine, final Configuration effectiveConfiguration) {
final Properties properties = commandLine.getOptionProperties(DYNAMIC_PROPERTIES.getOpt());
properties
.stringPropertyNames()
.forEach(
key -> {
final String value = properties.getProperty(key);
if (value != null) {
effectiveConfiguration.setString(key, value);
} else {
effectiveConfiguration.setString(key, "true");
}
});
}
是添加DYNAMIC_PROPERTIES,我们也没有 - - 。
最后设置一下conf dir
resultConfiguration.set(DeploymentOptionsInternal.CONF_DIR, configurationDir);
最后把原本的flink-conf.yaml的conf和命令行输入的conf进行整合。
effectiveConfiguration.addAll(commandLineConfiguration);
底层的逻辑是两个hashmap.putAll,所有前面的是flink-conf重复的会被命令的的conf所替代。
2.ExecutionConfigAccessor.fromProgramOptions
具体代码
public static <T> ExecutionConfigAccessor fromProgramOptions(
final ProgramOptions options, final List<T> jobJars) {
checkNotNull(options);
checkNotNull(jobJars);
final Configuration configuration = new Configuration();
options.applyToConfiguration(configuration);
ConfigUtils.encodeCollectionToConfig(
configuration, PipelineOptions.JARS, jobJars, Object::toString);
return new ExecutionConfigAccessor(configuration);
}
看第一个 options.applyToConfiguration(configuration);
public void applyToConfiguration(Configuration configuration) {
if (getParallelism() != ExecutionConfig.PARALLELISM_DEFAULT) {
configuration.setInteger(CoreOptions.DEFAULT_PARALLELISM, getParallelism());
}
configuration.setBoolean(DeploymentOptions.ATTACHED, !getDetachedMode());
configuration.setBoolean(
DeploymentOptions.SHUTDOWN_IF_ATTACHED, isShutdownOnAttachedExit());
ConfigUtils.encodeCollectionToConfig(
configuration, PipelineOptions.CLASSPATHS, getClasspaths(), URL::toString);
SavepointRestoreSettings.toConfiguration(getSavepointRestoreSettings(), configuration);
}
再看第二个
ConfigUtils.encodeCollectionToConfig(
configuration, PipelineOptions.JARS, jobJars, Object::toString);
public static <IN, OUT> void encodeCollectionToConfig(
final WritableConfig configuration,
final ConfigOption<List<OUT>> key,
@Nullable final Collection<IN> values,
final Function<IN, OUT> mapper) {
checkNotNull(configuration);
checkNotNull(key);
checkNotNull(mapper);
if (values == null) {
return;
}
final List<OUT> encodedOption =
values.stream()
.filter(Objects::nonNull)
.map(mapper)
.filter(Objects::nonNull)
.collect(Collectors.toCollection(ArrayList::new));
configuration.set(key, encodedOption);
}
只是把jar包封装入configuration中。
最后返回new ExecutionConfigAccessor(configuration)
private ExecutionConfigAccessor(final Configuration configuration) {
this.configuration = checkNotNull(configuration);
}
构造器只是把configuration传进去了。
3.executionParameters.applyToConfiguration(effectiveConfiguration);
public Configuration applyToConfiguration(final Configuration baseConfiguration) {
baseConfiguration.addAll(configuration);
return baseConfiguration;
}
合并两个configuration。
9.new ApplicationConfiguration
final ApplicationConfiguration applicationConfiguration =
new ApplicationConfiguration(
programOptions.getProgramArgs(), programOptions.getEntryPointClassName());
具体构造器
public ApplicationConfiguration(
final String[] programArguments, @Nullable final String applicationClassName) {
this.programArguments = checkNotNull(programArguments);
this.applicationClassName = applicationClassName;
}
EntryPointClassName并没有设置,后面应该会有设置默认的。这个是自己写的flink任务的main方法开始的类路径 比如com.xinyuan.WordCount.
10.deployer.run
这里的deployer并没有设置什么,都是默认的new。
deployer.run(effectiveConfiguration, applicationConfiguration);
applicationConfiguration是程序的配置。
effectiveConfiguration 是flink的配置。
来看具体过程,正式开始提交作业了!
public <ClusterID> void run(
final Configuration configuration,
final ApplicationConfiguration applicationConfiguration)
throws Exception {
checkNotNull(configuration);
checkNotNull(applicationConfiguration);
LOG.info("Submitting application in 'Application Mode'.");
final ClusterClientFactory<ClusterID> clientFactory =
clientServiceLoader.getClusterClientFactory(configuration);
try (final ClusterDescriptor<ClusterID> clusterDescriptor =
clientFactory.createClusterDescriptor(configuration)) {
final ClusterSpecification clusterSpecification =
clientFactory.getClusterSpecification(configuration);
clusterDescriptor.deployApplicationCluster(
clusterSpecification, applicationConfiguration);
}
}
创建ClusterClientFactory,来获得ClusterDescriptor和ClusterSpecification。
调用deployApplicationCluster表示以application的模式提交。
之后一节在分析clusterDescriptor.deployApplicationCluster。