Table & SQL API 是一种关系型 API,用户可以像操作 mysql 数据库表一样的操作数据,而不需要写java 代码完成 Flink Function,更不需要手工的优化 java 代码调优。

BatchTableEnvironment (离线批处理Table API)

ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);

StreamTableEnvironment (实时流处理Table API)

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

一、flink-sql基本使用

1、导入flink-sql依赖

<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.9.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.9.1</version>
</dependency>

2、Table Sql案例

package cn._51doit.flink.day11.stream;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;

import static org.apache.flink.table.api.Expressions.$;

public class SQLWordCount {

public static void main(String[] args) throws Exception {

//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});

tEnv.createTemporaryView("v_word_count", wordAndOne, $("word"), $("counts"));

Table table = tEnv.sqlQuery("SELECT word, sum(counts) counts FROM v_word_count GROUP BY word");

//table.printSchema();
//可更新的数据流
DataStream<Tuple2<Boolean, Row>> dataStream = tEnv.toRetractStream(table, Row.class);

dataStream.print();

env.execute("SQLWordCount");

}

}

3、Table API 案例

package cn._51doit.flink.day11.stream;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;

import static org.apache.flink.table.api.Expressions.$;

public class TableWordCount {

public static void main(String[] args) throws Exception {

//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});

Table table = tEnv.fromDataStream(wordAndOne, $("word"), $("one"));

//调用Table API(DSL)
Table res = table.groupBy($("word"))
.select($("word"), $("one").sum().as("counts"));


DataStream<Tuple2<Boolean, Row>> retractStream = tEnv.toRetractStream(res, Row.class);

retractStream.print();

env.execute("TableWordCount");

}

}
package cn._51doit.flink.day11.stream;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;

import static org.apache.flink.table.api.Expressions.$;

public class AppendStreanDemo {

public static void main(String[] args) throws Exception {

//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});

Table table = tEnv.fromDataStream(wordAndOne, $("word"), $("one"));

Table res = table.select($("word").upperCase(), $("one"));

DataStream<Row> rowDataStream = tEnv.toAppendStream(res, Row.class); //可追加的数据流

rowDataStream.print();

env.execute("TableWordCount");

}

}

4、按照EventTime划分滚动窗口聚合

package cn._51doit.flink.day11.stream;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import static org.apache.flink.table.api.Expressions.$;

public class SqlTumblingEventTimeWindow {

public static void main(String[] args) throws Exception {

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

env.setParallelism(1);

//env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

//1000,u1,p1,5
//2000,u1,p1,5
//2000,u2,p1,3
//3000,u1,p1,5
//9999,u2,p1,3
DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 8888);

SingleOutputStreamOperator<Row> rowDataStream = socketTextStream.map(
new MapFunction<String, Row>() {
@Override
public Row map(String line) throws Exception {
String[] fields = line.split(",");
Long time = Long.parseLong(fields[0]);
String uid = fields[1];
String pid = fields[2];
Double money = Double.parseDouble(fields[3]);
return Row.of(time, uid, pid, money);

}
}).returns(Types.ROW(Types.LONG, Types.STRING, Types.STRING, Types.DOUBLE));

//提取数据中的EventTime并生成WaterMark
DataStream<Row> waterMarksRow = rowDataStream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Row>(Time.seconds(0)) {
@Override
public long extractTimestamp(Row row) {
return (long) row.getField(0);
}
});
//将DataStream注册成表并指定schema信息
tableEnv.createTemporaryView("t_orders", waterMarksRow, $("time"), $("uid"), $("pid"), $("money"), $("aaa").rowtime());
//tableEnv.registerDataStream("t_orders", waterMarksRow, "time, uid, pid, money, rowtime.rowtime");
//使用SQL实现按照EventTime划分滚动窗口聚合
String sql = "SELECT uid, SUM(money) total_money, TUMBLE_START(aaa, INTERVAL '10' SECOND) as win_start, " +
"TUMBLE_END(aaa, INTERVAL '10' SECOND) as win_end " +
"FROM t_orders GROUP BY TUMBLE(aaa, INTERVAL '10' SECOND), uid";
Table table = tableEnv.sqlQuery(sql);
//使用TableEnv将table转成AppendStream
DataStream<Row> result = tableEnv.toAppendStream(table, Row.class);
result.print();
env.execute();
}

}

5、按照EventTime划分滑动窗口聚合

package cn._51doit.flink.day11.stream;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import static org.apache.flink.table.api.Expressions.$;

public class SqlSlidingEventTimeWindows {

public static void main(String[] args) throws Exception {

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

env.setParallelism(1);

//env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

//1000,u1,p1,5
//2000,u1,p1,5
//2000,u2,p1,3
//3000,u1,p1,5
//9999,u2,p1,3
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);

DataStream<Row> rowDataStream = lines.map(new MapFunction<String, Row>() {
@Override
public Row map(String line) throws Exception {
String[] fields = line.split(",");
Long time = Long.parseLong(fields[0]);
String uid = fields[1];
String pid = fields[2];
Double money = Double.parseDouble(fields[3]);
return Row.of(time, uid, pid, money);

}
}).returns(Types.ROW(Types.LONG, Types.STRING, Types.STRING, Types.DOUBLE));

//提取数据中的EventTime并生成WaterMark
DataStream<Row> waterMarksRow = rowDataStream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Row>(Time.seconds(0)) {
@Override
public long extractTimestamp(Row row) {
return (long) row.getField(0);
}
});
//将DataStream注册成表并指定schema信息
//tableEnv.registerDataStream("t_orders", waterMarksRow, "time, uid, pid, money, rowtime.rowtime");
tableEnv.createTemporaryView("t_orders", waterMarksRow, $("time"), $("uid"), $("pid"), $("money"), $("rowtime").rowtime());
//使用SQL实现按照EventTime划分滑动窗口聚合
String sql = "SELECT uid, SUM(money) total_money, HOP_END(rowtime, INTERVAL '2' SECOND, INTERVAL '10' SECOND) as widEnd" +
" FROM t_orders GROUP BY HOP(rowtime, INTERVAL '2' SECOND, INTERVAL '10' SECOND), uid";
Table table = tableEnv.sqlQuery(sql);
//使用TableEnv将table转成AppendStream
DataStream<Row> result = tableEnv.toAppendStream(table, Row.class);
result.print();
env.execute();
}

}

6、自定义函数

package cn._51doit.flink.day11.stream;

import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import static org.apache.flink.table.api.Expressions.$;

public class UDFSQLDemo {

public static void main(String[] args) throws Exception {

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

//注册一个可以Cache的文件,通过网络发送给TaskManager
env.registerCachedFile("/Users/xing/Desktop/ip.txt", "ip-rules");

StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

//106.121.4.252
//42.57.88.186
DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 8888);

tableEnv.createTemporaryView("t_lines", socketTextStream, $("ip"));

//注册自定义函数,是一个UDF,输入一个IP地址,返回Row<省、市>
tableEnv.createTemporarySystemFunction("ip2Location", IpLocation.class);

//tableEnv.registerFunction("split", new Split("\\W+"));

Table table = tableEnv.sqlQuery(
"SELECT ip, ip2Location(ip) location FROM t_lines");

tableEnv.toAppendStream(table, Row.class).print();

env.execute();

}

}

7、从kafka里读数据

package cn._51doit.flink.day11.stream;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

public class SQLKafkaTable {

public static void main(String[] args) throws Exception{

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

tEnv.executeSql(
"CREATE TABLE KafkaTable (\n" +
" `user_id` BIGINT,\n" +
" `item_id` BIGINT,\n" +
" `behavior` STRING,\n" +
" `ts` TIMESTAMP(3) METADATA FROM 'timestamp'\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'kafka-csv',\n" +
" 'properties.bootstrap.servers' = 'node-1.51doit.cn:9092,node-2.51doit.cn:9092,node-3.51doit.cn:9092',\n" +
" 'properties.group.id' = 'testGroup',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'csv'\n" +
")"
);

Table table = tEnv.sqlQuery("SELECT * FROM KafkaTable");

DataStream<Row> appendStream = tEnv.toAppendStream(table, Row.class);

appendStream.print();

env.execute();

}
}

大数据之实时数仓建设(四)_sql