Table & SQL API 是一种关系型 API,用户可以像操作 mysql 数据库表一样的操作数据,而不需要写java 代码完成 Flink Function,更不需要手工的优化 java 代码调优。
BatchTableEnvironment (离线批处理Table API)
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);
StreamTableEnvironment (实时流处理Table API)
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
一、flink-sql基本使用
1、导入flink-sql依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.9.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.9.1</version>
</dependency>
2、Table Sql案例
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import static org.apache.flink.table.api.Expressions.$;
public class SQLWordCount {
public static void main(String[] args) throws Exception {
//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});
tEnv.createTemporaryView("v_word_count", wordAndOne, $("word"), $("counts"));
Table table = tEnv.sqlQuery("SELECT word, sum(counts) counts FROM v_word_count GROUP BY word");
//table.printSchema();
//可更新的数据流
DataStream<Tuple2<Boolean, Row>> dataStream = tEnv.toRetractStream(table, Row.class);
dataStream.print();
env.execute("SQLWordCount");
}
}
3、Table API 案例
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import static org.apache.flink.table.api.Expressions.$;
public class TableWordCount {
public static void main(String[] args) throws Exception {
//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});
Table table = tEnv.fromDataStream(wordAndOne, $("word"), $("one"));
//调用Table API(DSL)
Table res = table.groupBy($("word"))
.select($("word"), $("one").sum().as("counts"));
DataStream<Tuple2<Boolean, Row>> retractStream = tEnv.toRetractStream(res, Row.class);
retractStream.print();
env.execute("TableWordCount");
}
}
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import static org.apache.flink.table.api.Expressions.$;
public class AppendStreanDemo {
public static void main(String[] args) throws Exception {
//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});
Table table = tEnv.fromDataStream(wordAndOne, $("word"), $("one"));
Table res = table.select($("word").upperCase(), $("one"));
DataStream<Row> rowDataStream = tEnv.toAppendStream(res, Row.class); //可追加的数据流
rowDataStream.print();
env.execute("TableWordCount");
}
}
4、按照EventTime划分滚动窗口聚合
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
public class SqlTumblingEventTimeWindow {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//1000,u1,p1,5
//2000,u1,p1,5
//2000,u2,p1,3
//3000,u1,p1,5
//9999,u2,p1,3
DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 8888);
SingleOutputStreamOperator<Row> rowDataStream = socketTextStream.map(
new MapFunction<String, Row>() {
public Row map(String line) throws Exception {
String[] fields = line.split(",");
Long time = Long.parseLong(fields[0]);
String uid = fields[1];
String pid = fields[2];
Double money = Double.parseDouble(fields[3]);
return Row.of(time, uid, pid, money);
}
}).returns(Types.ROW(Types.LONG, Types.STRING, Types.STRING, Types.DOUBLE));
//提取数据中的EventTime并生成WaterMark
DataStream<Row> waterMarksRow = rowDataStream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Row>(Time.seconds(0)) {
public long extractTimestamp(Row row) {
return (long) row.getField(0);
}
});
//将DataStream注册成表并指定schema信息
tableEnv.createTemporaryView("t_orders", waterMarksRow, $("time"), $("uid"), $("pid"), $("money"), $("aaa").rowtime());
//tableEnv.registerDataStream("t_orders", waterMarksRow, "time, uid, pid, money, rowtime.rowtime");
//使用SQL实现按照EventTime划分滚动窗口聚合
String sql = "SELECT uid, SUM(money) total_money, TUMBLE_START(aaa, INTERVAL '10' SECOND) as win_start, " +
"TUMBLE_END(aaa, INTERVAL '10' SECOND) as win_end " +
"FROM t_orders GROUP BY TUMBLE(aaa, INTERVAL '10' SECOND), uid";
Table table = tableEnv.sqlQuery(sql);
//使用TableEnv将table转成AppendStream
DataStream<Row> result = tableEnv.toAppendStream(table, Row.class);
result.print();
env.execute();
}
}
5、按照EventTime划分滑动窗口聚合
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
public class SqlSlidingEventTimeWindows {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//1000,u1,p1,5
//2000,u1,p1,5
//2000,u2,p1,3
//3000,u1,p1,5
//9999,u2,p1,3
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
DataStream<Row> rowDataStream = lines.map(new MapFunction<String, Row>() {
public Row map(String line) throws Exception {
String[] fields = line.split(",");
Long time = Long.parseLong(fields[0]);
String uid = fields[1];
String pid = fields[2];
Double money = Double.parseDouble(fields[3]);
return Row.of(time, uid, pid, money);
}
}).returns(Types.ROW(Types.LONG, Types.STRING, Types.STRING, Types.DOUBLE));
//提取数据中的EventTime并生成WaterMark
DataStream<Row> waterMarksRow = rowDataStream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Row>(Time.seconds(0)) {
public long extractTimestamp(Row row) {
return (long) row.getField(0);
}
});
//将DataStream注册成表并指定schema信息
//tableEnv.registerDataStream("t_orders", waterMarksRow, "time, uid, pid, money, rowtime.rowtime");
tableEnv.createTemporaryView("t_orders", waterMarksRow, $("time"), $("uid"), $("pid"), $("money"), $("rowtime").rowtime());
//使用SQL实现按照EventTime划分滑动窗口聚合
String sql = "SELECT uid, SUM(money) total_money, HOP_END(rowtime, INTERVAL '2' SECOND, INTERVAL '10' SECOND) as widEnd" +
" FROM t_orders GROUP BY HOP(rowtime, INTERVAL '2' SECOND, INTERVAL '10' SECOND), uid";
Table table = tableEnv.sqlQuery(sql);
//使用TableEnv将table转成AppendStream
DataStream<Row> result = tableEnv.toAppendStream(table, Row.class);
result.print();
env.execute();
}
}
6、自定义函数
package cn._51doit.flink.day11.stream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
public class UDFSQLDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//注册一个可以Cache的文件,通过网络发送给TaskManager
env.registerCachedFile("/Users/xing/Desktop/ip.txt", "ip-rules");
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//106.121.4.252
//42.57.88.186
DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 8888);
tableEnv.createTemporaryView("t_lines", socketTextStream, $("ip"));
//注册自定义函数,是一个UDF,输入一个IP地址,返回Row<省、市>
tableEnv.createTemporarySystemFunction("ip2Location", IpLocation.class);
//tableEnv.registerFunction("split", new Split("\\W+"));
Table table = tableEnv.sqlQuery(
"SELECT ip, ip2Location(ip) location FROM t_lines");
tableEnv.toAppendStream(table, Row.class).print();
env.execute();
}
}
7、从kafka里读数据
package cn._51doit.flink.day11.stream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class SQLKafkaTable {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
tEnv.executeSql(
"CREATE TABLE KafkaTable (\n" +
" `user_id` BIGINT,\n" +
" `item_id` BIGINT,\n" +
" `behavior` STRING,\n" +
" `ts` TIMESTAMP(3) METADATA FROM 'timestamp'\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'kafka-csv',\n" +
" 'properties.bootstrap.servers' = 'node-1.51doit.cn:9092,node-2.51doit.cn:9092,node-3.51doit.cn:9092',\n" +
" 'properties.group.id' = 'testGroup',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'csv'\n" +
")"
);
Table table = tEnv.sqlQuery("SELECT * FROM KafkaTable");
DataStream<Row> appendStream = tEnv.toAppendStream(table, Row.class);
appendStream.print();
env.execute();
}
}