1、时间窗口简述
(1)基础概述
- 时间窗口:按照一定时间规则作为窗口统计
time-tumbing-window:时间滚动窗口
time-sliding-window:时间滑动窗口
session-window:会话窗口,即一个会话内的数量统计
(2)API使用流程
- WindowAPI使用场景:
有KeyBy用Window()api:Stream.keyBy().window()
没有keyBy用window()api:Stream.windowall()
- 注意:一个窗口内的是左闭右开
(3)参数相关
①窗口分配器Window Assigners:定义元素分配窗口机制
②窗口触发器trigger:控制窗口触发计算的机制
③窗口数据操作window function:对窗口内数据的计算操作
- 增量聚合函数:aggregate(agg函数,windowFunction(){})
- 窗口保存临时数据,每进入一个新数据,会与中间数据累加,生成新的中间数据。
AggregateFunction<IN, ACC, OUT>
IN:是输⼊类型;
ACC:是中间聚合状态类型;
OUT:是输出类型,是聚合统计当前窗⼝的数据。
- 全窗口函数:WindowFunction
- 先缓存该窗⼝所有元素,等窗⼝的全部数据收集起来 后再触发条件计算
WindowFunction<IN, OUT, KEY, W extends Window>
IN:是输⼊类型
OUT:是输出类型
KEY:是分组类型
W:是时间窗
2、滚动时间窗
- 滚动窗口:窗口大小固定 & 数据五重叠
案例:每5s统计最近是5s订单量
- 代码实战
(1)时间输出格式化工具类
package com.lihaiwei.text1.util;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Date;
public class TimeUtil {
public static String format(Date time){
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
ZoneId zoneId = ZoneId.systemDefault();
String timeStr = formatter.format(time.toInstant().atZone(zoneId));
return timeStr;
}
}
(2)定义数据接口
package com.lihaiwei.text1.model;
import com.lihaiwei.text1.util.TimeUtil;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.Date;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class VideoOrder {
private String tradeNo;
private String title;
private int money;
private int userId;
private Date createTime;
@Override
public String toString() {
return "VideoOrder{" +
"tradeNo='" + tradeNo + '\'' +
", title='" + title + '\'' +
", money=" + money +
", userId=" + userId +
", createTime=" + TimeUtil.format(createTime) +
'}';
}
}
(3)数据源随机生成
package com.lihaiwei.text1.source;
import com.lihaiwei.text1.model.VideoOrder;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import java.util.*;
public class VideoOrderSource extends RichParallelSourceFunction<VideoOrder> {
private volatile Boolean flag = true;
private Random random = new Random();
private static List<String> list = new ArrayList<>();
static {
list.add("spark课程");
list.add("oracle课程");
list.add("RabbitMQ消息队列");
list.add("Kafka课程");
list.add("hadoop课程");
list.add("Flink流式技术课程");
list.add("工业级微服务项目大课训练营");
list.add("Linux课程");
}
/**
* run 方法调用前 用于初始化连接
* @param parameters
* @throws Exception
*/
@Override
public void open(Configuration parameters) throws Exception {
System.out.println("-----open-----");
}
/**
* 用于清理之前
* @throws Exception
*/
@Override
public void close() throws Exception {
System.out.println("-----close-----");
}
/**
* 产生数据的逻辑
* @param ctx
* @throws Exception
*/
@Override
public void run(SourceContext<VideoOrder> ctx) throws Exception {
while (flag){
Thread.sleep(1000);
String id = UUID.randomUUID().toString();
int userId = random.nextInt(10);
int money = random.nextInt(100);
int videoNum = random.nextInt(list.size());
String title = list.get(videoNum);
VideoOrder videoOrder = new VideoOrder(id,title,money,userId,new Date());
//VideoOrderSourceV2
ctx.collect(videoOrder);
}
}
/**
* 控制任务取消
*/
@Override
public void cancel() {
flag = false;
}
}
(4)main方法
package com.lihaiwei.text1.app;
import com.lihaiwei.text1.model.VideoOrder;
import com.lihaiwei.text1.sink.MysqlSink;
import com.lihaiwei.text1.source.VideoOrderSource;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class Flink01App {
public static void main(String[] args) throws Exception {
// 1、构建执行任务以及任务的启动入口,存储全局相关参数
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 2、获取数据源 -
DataStreamSource<VideoOrder> stringDS = env.addSource(new VideoOrderSource());
// 打印原始数据
stringDS.print("提交前");
// 3.1、进行分组
KeyedStream<VideoOrder,String> keybyDS = stringDS.keyBy(new KeySelector<VideoOrder, String>() {
@Override
public String getKey(VideoOrder value) throws Exception {
return value.getTitle();
}
});
// 3.2、进行时间窗口计算,每个5s统计
DataStream<VideoOrder> sumDS = keybyDS.window(TumblingProcessingTimeWindows.of(Time.seconds(10))).sum("money");
// 3、输出
sumDS.print("提交后");
// 4、输出到自定义sink
//stringDS.addSink(new MysqlSink());
// 5、/DataStream需要调⽤execute,可以取个名称
env.execute("flink");
}
}
- 运行结果
3、滑动时间窗
- 滑动窗口:窗口大小固定 & 数据有重叠
案例:每5s统计最近20s的数据
- 代码实战
package com.lihaiwei.text1.app;
import com.lihaiwei.text1.model.VideoOrder;
import com.lihaiwei.text1.sink.MysqlSink;
import com.lihaiwei.text1.source.VideoOrderSource;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class flink06time {
public static void main(String[] args) throws Exception {
// 1、构建执行任务以及任务的启动入口,存储全局相关参数
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 2、获取数据源 -
DataStreamSource<VideoOrder> stringDS = env.addSource(new VideoOrderSource());
// 打印原始数据
stringDS.print("提交前");
// 3.1、进行分组
KeyedStream<VideoOrder,String> keybyDS = stringDS.keyBy(new KeySelector<VideoOrder, String>() {
@Override
public String getKey(VideoOrder value) throws Exception {
return value.getTitle();
}
});
// 3.2、进行时间窗口计算,每个5s统计
DataStream<VideoOrder> sumDS = keybyDS.window(SlidingProcessingTimeWindows.of(Time.seconds(10),Time.seconds(5))).sum("money");
// 3、输出
sumDS.print("提交后");
// 4、输出到自定义sink
//stringDS.addSink(new MysqlSink());
// 5、/DataStream需要调⽤execute,可以取个名称
env.execute("flink");
}
}
- 运行结果
提交前> VideoOrder{tradeNo='ca3401be-6f7e-42d3-81e7-d6ab657ad892', title='工业级微服务项目大课训练营', money=72, userId=7, createTime=2021-11-11 11:03:35}
提交前> VideoOrder{tradeNo='165d6506-a80f-4a2b-910e-2c33985bab5b', title='工业级微服务项目大课训练营', money=26, userId=8, createTime=2021-11-11 11:03:36}
提交前> VideoOrder{tradeNo='53ccaed6-916b-4035-ba84-562064ecfa5f', title='Flink流式技术课程', money=38, userId=8, createTime=2021-11-11 11:03:37}
提交前> VideoOrder{tradeNo='dd6f68e8-5afd-4151-a23b-1083c8c25881', title='工业级微服务项目大课训练营', money=98, userId=7, createTime=2021-11-11 11:03:38}
提交前> VideoOrder{tradeNo='a523a9c2-168b-4e6d-959f-6c74b95e5f41', title='Flink流式技术课程', money=63, userId=6, createTime=2021-11-11 11:03:39}
提交后> VideoOrder{tradeNo='cbce3699-52e3-42cd-8d1d-8feabd936335', title='hadoop课程', money=47, userId=8, createTime=2021-11-11 11:03:34}
提交后> VideoOrder{tradeNo='ca3401be-6f7e-42d3-81e7-d6ab657ad892', title='工业级微服务项目大课训练营', money=196, userId=7, createTime=2021-11-11 11:03:35}
提交后> VideoOrder{tradeNo='53ccaed6-916b-4035-ba84-562064ecfa5f', title='Flink流式技术课程', money=101, userId=8, createTime=2021-11-11 11:03:37}
提交后> VideoOrder{tradeNo='edee6f2b-22b1-4270-86eb-34080d248974', title='RabbitMQ消息队列', money=81, userId=9, createTime=2021-11-11 11:03:33}
提交前> VideoOrder{tradeNo='c727a351-bcd0-4c6c-a5be-9b8167e733e9', title='Linux课程', money=95, userId=5, createTime=2021-11-11 11:03:40}
提交前> VideoOrder{tradeNo='7bbaf831-ad47-4644-975f-1f6a6c68d1e9', title='spark课程', money=26, userId=3, createTime=2021-11-11 11:03:41}
提交前> VideoOrder{tradeNo='c4bce9f3-07dc-4f48-a93d-cbe92c6e859d', title='hadoop课程', money=71, userId=0, createTime=2021-11-11 11:03:42}
提交前> VideoOrder{tradeNo='db3a6aff-78fb-401e-a84a-872d4c499d18', title='oracle课程', money=79, userId=0, createTime=2021-11-11 11:03:43}
提交前> VideoOrder{tradeNo='34819919-61f3-4c06-96c9-01cdd55f4829', title='Linux课程', money=44, userId=6, createTime=2021-11-11 11:03:44}
提交后> VideoOrder{tradeNo='cbce3699-52e3-42cd-8d1d-8feabd936335', title='hadoop课程', money=118, userId=8, createTime=2021-11-11 11:03:34}
提交后> VideoOrder{tradeNo='53ccaed6-916b-4035-ba84-562064ecfa5f', title='Flink流式技术课程', money=101, userId=8, createTime=2021-11-11 11:03:37}
提交后> VideoOrder{tradeNo='db3a6aff-78fb-401e-a84a-872d4c499d18', title='oracle课程', money=79, userId=0, createTime=2021-11-11 11:03:43}
提交后> VideoOrder{tradeNo='edee6f2b-22b1-4270-86eb-34080d248974', title='RabbitMQ消息队列', money=81, userId=9, createTime=2021-11-11 11:03:33}
提交后> VideoOrder{tradeNo='ca3401be-6f7e-42d3-81e7-d6ab657ad892', title='工业级微服务项目大课训练营', money=196, userId=7, createTime=2021-11-11 11:03:35}
提交后> VideoOrder{tradeNo='c727a351-bcd0-4c6c-a5be-9b8167e733e9', title='Linux课程', money=139, userId=5, createTime=2021-11-11 11:03:40}
提交后> VideoOrder{tradeNo='7bbaf831-ad47-4644-975f-1f6a6c68d1e9', title='spark课程', money=26, userId=3, createTime=2021-11-11 11:03:41}
提交前> VideoOrder{tradeNo='cd90defc-5e6a-4f07-9afc-209ac135dcbb', title='RabbitMQ消息队列', money=27, userId=6, createTime=2021-11-11 11:03:45}
提交前> VideoOrder{tradeNo='19cd0950-d99a-4e0b-895c-ee107cb57e51', title='Linux课程', money=71, userId=6, createTime=2021-11-11 11:03:46}
提交前> VideoOrder{tradeNo='3e35ac36-83a7-4faf-8edd-aa60ed964022', title='Linux课程', money=20, userId=4, createTime=2021-11-11 11:03:47}
提交前> VideoOrder{tradeNo='2ccf8d9e-5b93-44c0-a2ae-4b1359e07efc', title='oracle课程', money=65, userId=6, createTime=2021-11-11 11:03:48}
提交前> VideoOrder{tradeNo='61172aa8-30e4-46f2-a1d3-64810a38c8ad', title='Linux课程', money=92, userId=0, createTime=2021-11-11 11:03:49}
提交后> VideoOrder{tradeNo='cbce3699-52e3-42cd-8d1d-8feabd936335', title='hadoop课程', money=118, userId=8, createTime=2021-11-11 11:03:34}
提交后> VideoOrder{tradeNo='edee6f2b-22b1-4270-86eb-34080d248974', title='RabbitMQ消息队列', money=108, userId=9, createTime=2021-11-11 11:03:33}
提交后> VideoOrder{tradeNo='db3a6aff-78fb-401e-a84a-872d4c499d18', title='oracle课程', money=144, userId=0, createTime=2021-11-11 11:03:43}
提交后> VideoOrder{tradeNo='53ccaed6-916b-4035-ba84-562064ecfa5f', title='Flink流式技术课程', money=101, userId=8, createTime=2021-11-11 11:03:37}
提交后> VideoOrder{tradeNo='c727a351-bcd0-4c6c-a5be-9b8167e733e9', title='Linux课程', money=322, userId=5, createTime=2021-11-11 11:03:40}
提交后> VideoOrder{tradeNo='7bbaf831-ad47-4644-975f-1f6a6c68d1e9', title='spark课程', money=26, userId=3, createTime=2021-11-11 11:03:41}
提交后> VideoOrder{tradeNo='ca3401be-6f7e-42d3-81e7-d6ab657ad892', title='工业级微服务项目大课训练营', money=196, userId=7, createTime=2021-11-11 11:03:35}
提交前> VideoOrder{tradeNo='1359299b-f20c-48b1-a8b1-a1c8d8f7f820', title='Flink流式技术课程', money=61, userId=3, createTime=2021-11-11 11:03:50}
提交前> VideoOrder{tradeNo='cb13ad34-15b3-4a87-8a1a-029068f5edec', title='工业级微服务项目大课训练营', money=74, userId=8, createTime=2021-11-11 11:03:51}
提交前> VideoOrder{tradeNo='53473645-780e-4139-84b7-86b9905ee825', title='Linux课程', money=42, userId=9, createTime=2021-11-11 11:03:52}
提交前> VideoOrder{tradeNo='262b2e53-727a-4638-bd94-e31dee189f25', title='hadoop课程', money=16, userId=3, createTime=2021-11-11 11:03:53}
提交前> VideoOrder{tradeNo='5de85dbc-adff-45c1-9942-fa509618133a', title='hadoop课程', money=13, userId=7, createTime=2021-11-11 11:03:54}
提交后> VideoOrder{tradeNo='cbce3699-52e3-42cd-8d1d-8feabd936335', title='hadoop课程', money=134, userId=8, createTime=2021-11-11 11:03:34}
提交后> VideoOrder{tradeNo='db3a6aff-78fb-401e-a84a-872d4c499d18', title='oracle课程', money=144, userId=0, createTime=2021-11-11 11:03:43}
提交后> VideoOrder{tradeNo='c727a351-bcd0-4c6c-a5be-9b8167e733e9', title='Linux课程', money=364, userId=5, createTime=2021-11-11 11:03:40}
提交后> VideoOrder{tradeNo='53ccaed6-916b-4035-ba84-562064ecfa5f', title='Flink流式技术课程', money=162, userId=8, createTime=2021-11-11 11:03:37}
提交后> VideoOrder{tradeNo='edee6f2b-22b1-4270-86eb-34080d248974', title='RabbitMQ消息队列', money=108, userId=9, createTime=2021-11-11 11:03:33}
提交后> VideoOrder{tradeNo='ca3401be-6f7e-42d3-81e7-d6ab657ad892', title='工业级微服务项目大课训练营', money=270, userId=7, createTime=2021-11-11 11:03:35}
提交后> VideoOrder{tradeNo='7bbaf831-ad47-4644-975f-1f6a6c68d1e9', title='spark课程', money=26, userId=3, createTime=2021-11-11 11:03:41}
提交前> VideoOrder{tradeNo='a91f4b97-be57-4e4e-ab9c-1bb4ec41ec29', title='Kafka课程', money=18, userId=5, createTime=2021-11-11 11:03:55}
提交前> VideoOrder{tradeNo='03cd4121-6f6b-4f5a-bc89-1aa6047b1af7', title='Flink流式技术课程', money=60, userId=5, createTime=2021-11-11 11:03:56}
提交前> VideoOrder{tradeNo='cffe441f-9f0c-43b9-9032-1982715b5f9e', title='Flink流式技术课程', money=22, userId=3, createTime=2021-11-11 11:03:57}
提交前> VideoOrder{tradeNo='efe83d20-fb73-4ee2-9eee-6277c39ca83e', title='spark课程', money=73, userId=6, createTime=2021-11-11 11:03:58}
提交前> VideoOrder{tradeNo='cded22d2-ed63-4f5b-8626-40a77b722a01', title='spark课程', money=40, userId=9, createTime=2021-11-11 11:03:59}
提交后> VideoOrder{tradeNo='edee6f2b-22b1-4270-86eb-34080d248974', title='RabbitMQ消息队列', money=108, userId=9, createTime=2021-11-11 11:03:33}
提交后> VideoOrder{tradeNo='db3a6aff-78fb-401e-a84a-872d4c499d18', title='oracle课程', money=144, userId=0, createTime=2021-11-11 11:03:43}
提交后> VideoOrder{tradeNo='53ccaed6-916b-4035-ba84-562064ecfa5f', title='Flink流式技术课程', money=244, userId=8, createTime=2021-11-11 11:03:37}
提交后> VideoOrder{tradeNo='a91f4b97-be57-4e4e-ab9c-1bb4ec41ec29', title='Kafka课程', money=18, userId=5, createTime=2021-11-11 11:03:55}
提交后> VideoOrder{tradeNo='c727a351-bcd0-4c6c-a5be-9b8167e733e9', title='Linux课程', money=364, userId=5, createTime=2021-11-11 11:03:40}
提交后> VideoOrder{tradeNo='ca3401be-6f7e-42d3-81e7-d6ab657ad892', title='工业级微服务项目大课训练营', money=270, userId=7, createTime=2021-11-11 11:03:35}
提交后> VideoOrder{tradeNo='7bbaf831-ad47-4644-975f-1f6a6c68d1e9', title='spark课程', money=99, userId=3, createTime=2021-11-11 11:03:41}
提交后> VideoOrder{tradeNo='cbce3699-52e3-42cd-8d1d-8feabd936335', title='hadoop课程', money=147, userId=8, createTime=2021-11-11 11:03:34}
提交前> VideoOrder{tradeNo='0e045e51-c7f7-4f83-8382-f3fe0b2ec7ae', title='RabbitMQ消息队列', money=67, userId=8, createTime=2021-11-11 11:04:00}
提交前> VideoOrder{tradeNo='14030a07-d148-4adb-b9c4-f6a268b8c023', title='oracle课程', money=11, userId=4, createTime=2021-11-11 11:04:01}
提交前> VideoOrder{tradeNo='356e2368-bade-459c-bf89-911a45a1d62c', title='工业级微服务项目大课训练营', money=69, userId=2, createTime=2021-11-11 11:04:02}
提交前> VideoOrder{tradeNo='1b389a92-5088-457b-8f89-be63223a1c39', title='Linux课程', money=21, userId=2, createTime=2021-11-11 11:04:03}
提交前> VideoOrder{tradeNo='51bc4bd1-d42d-4cd6-823c-6ef75f142a0d', title='hadoop课程', money=43, userId=9, createTime=2021-11-11 11:04:04}
提交后> VideoOrder{tradeNo='c4bce9f3-07dc-4f48-a93d-cbe92c6e859d', title='hadoop课程', money=143, userId=0, createTime=2021-11-11 11:03:42}
提交后> VideoOrder{tradeNo='a91f4b97-be57-4e4e-ab9c-1bb4ec41ec29', title='Kafka课程', money=18, userId=5, createTime=2021-11-11 11:03:55}
提交后> VideoOrder{tradeNo='c727a351-bcd0-4c6c-a5be-9b8167e733e9', title='Linux课程', money=385, userId=5, createTime=2021-11-11 11:03:40}
提交后> VideoOrder{tradeNo='cd90defc-5e6a-4f07-9afc-209ac135dcbb', title='RabbitMQ消息队列', money=94, userId=6, createTime=2021-11-11 11:03:45}
提交后> VideoOrder{tradeNo='53ccaed6-916b-4035-ba84-562064ecfa5f', title='Flink流式技术课程', money=244, userId=8, createTime=2021-11-11 11:03:37}
提交后> VideoOrder{tradeNo='7bbaf831-ad47-4644-975f-1f6a6c68d1e9', title='spark课程', money=139, userId=3, createTime=2021-11-11 11:03:41}
提交后> VideoOrder{tradeNo='db3a6aff-78fb-401e-a84a-872d4c499d18', title='oracle课程', money=155, userId=0, createTime=2021-11-11 11:03:43}
提交后> VideoOrder{tradeNo='ca3401be-6f7e-42d3-81e7-d6ab657ad892', title='工业级微服务项目大课训练营', money=339, userId=7, createTime=2021-11-11 11:03:35}
提交前> VideoOrder{tradeNo='9168bd0f-f3f1-48b1-8582-abd699bc0e30', title='oracle课程', money=22, userId=3, createTime=2021-11-11 11:04:05}
提交前> VideoOrder{tradeNo='8b9726da-019f-4de0-b536-e4db8b52188c', title='spark课程', money=27, userId=1, createTime=2021-11-11 11:04:06}
提交前> VideoOrder{tradeNo='d0185a6c-afb5-45a1-a52f-27d207129948', title='spark课程', money=81, userId=5, createTime=2021-11-11 11:04:07}
提交前> VideoOrder{tradeNo='b9ffb5b7-6295-4d04-a745-23075aaf233c', title='Kafka课程', money=13, userId=5, createTime=2021-11-11 11:04:08}
提交前> VideoOrder{tradeNo='4f3f03a8-1683-4ea8-92bc-74a8ca3bd5c3', title='Kafka课程', money=80, userId=9, createTime=2021-11-11 11:04:09}
提交后> VideoOrder{tradeNo='1359299b-f20c-48b1-a8b1-a1c8d8f7f820', title='Flink流式技术课程', money=143, userId=3, createTime=2021-11-11 11:03:50}
提交后> VideoOrder{tradeNo='cd90defc-5e6a-4f07-9afc-209ac135dcbb', title='RabbitMQ消息队列', money=94, userId=6, createTime=2021-11-11 11:03:45}
提交后> VideoOrder{tradeNo='c727a351-bcd0-4c6c-a5be-9b8167e733e9', title='Linux课程', money=385, userId=5, createTime=2021-11-11 11:03:40}
提交后> VideoOrder{tradeNo='a91f4b97-be57-4e4e-ab9c-1bb4ec41ec29', title='Kafka课程', money=111, userId=5, createTime=2021-11-11 11:03:55}
提交后> VideoOrder{tradeNo='db3a6aff-78fb-401e-a84a-872d4c499d18', title='oracle课程', money=177, userId=0, createTime=2021-11-11 11:03:43}
提交后> VideoOrder{tradeNo='cb13ad34-15b3-4a87-8a1a-029068f5edec', title='工业级微服务项目大课训练营', money=143, userId=8, createTime=2021-11-11 11:03:51}
提交后> VideoOrder{tradeNo='7bbaf831-ad47-4644-975f-1f6a6c68d1e9', title='spark课程', money=247, userId=3, createTime=2021-11-11 11:03:41}
提交后> VideoOrder{tradeNo='c4bce9f3-07dc-4f48-a93d-cbe92c6e859d', title='hadoop课程', money=143, userId=0, createTime=2021-11-11 11:03:42}
4、数量统计窗口
- 应用场景:基于数量的滚动窗⼝, 滑动计数窗⼝
统计分组后同个key内的数据超过5次则进⾏统计:countWindow(5)
只要有2个数据到达后就可以往后统计5个数据的值:countWindow(5, 2)
5、增量聚合
- 应用场景:定义了要对窗⼝中收集的数据做的计算操作
AggregateFunction<IN, ACC, OUT>
IN:是输⼊类型;
ACC:是中间聚合状态类型;
OUT:是输出类型,是聚合统计当前窗⼝的数据。
- 代码实战
package com.lihaiwei.text1.app;
import com.lihaiwei.text1.model.VideoOrder;
import com.lihaiwei.text1.sink.MysqlSink;
import com.lihaiwei.text1.source.VideoOrderSource;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class flink06time {
public static void main(String[] args) throws Exception {
// 1、构建执行任务以及任务的启动入口,存储全局相关参数
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 2、获取数据源 -
DataStreamSource<VideoOrder> stringDS = env.addSource(new VideoOrderSource());
// 打印原始数据
stringDS.print("提交前");
// 3.1、进行分组
KeyedStream<VideoOrder,String> keybyDS = stringDS.keyBy(new KeySelector<VideoOrder, String>() {
@Override
public String getKey(VideoOrder value) throws Exception {
return value.getTitle();
}
});
// 3.2、进行时间窗口聚合计算,每个5s统计
SingleOutputStreamOperator<VideoOrder> aggregate = keybyDS.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
.aggregate(new AggregateFunction<VideoOrder, VideoOrder, VideoOrder>() {
@Override
public VideoOrder createAccumulator() {
VideoOrder videoOrder = new VideoOrder();
return videoOrder;
}
@Override
public VideoOrder add(VideoOrder value, VideoOrder accumulator) {
// 获取累加结果
accumulator.setMoney(value.getMoney() + accumulator.getMoney());
// 获取分组字段
if (accumulator.getTitle() == null) {
accumulator.setTitle(value.getTitle());
}
// 因为调用了时间格式化的工具类,所以以下不写会报空指针异常
if (accumulator.getCreateTime() == null) {
accumulator.setCreateTime(value.getCreateTime());
return accumulator;
}
return accumulator;
}
@Override
public VideoOrder getResult(VideoOrder accumulator) {
return accumulator;
}
@Override
public VideoOrder merge(VideoOrder a, VideoOrder b) {
VideoOrder videoOrder = new VideoOrder();
videoOrder.setMoney(a.getMoney() + b.getMoney());
videoOrder.setTitle(a.getTitle());
return videoOrder;
}
});
DataStream<VideoOrder> aggDS = aggregate;
// 3、输出
aggregate.print("提交后");
// 4、输出到自定义sink
//stringDS.addSink(new MysqlSink());
// 5、/DataStream需要调⽤execute,可以取个名称
env.execute("flink");
}
}
- 运行结果
6.6、全窗口聚合
- 应用场景:先缓存该窗⼝所有元素,等窗⼝的全部数据收集起来后再 触发条件计算
WindowFunction<IN, OUT, KEY, W extends Window>
IN:是输⼊类型
OUT:是输出类型
KEY:是分组类型
W:是时间窗
processWindowFunction:可以获取到数据的上下文,包括窗口信息
ProcessWindowFunction<IN, OUT, KEY, W extendsWindow>
- 代码实战
package com.lihaiwei.text1.app;
import com.lihaiwei.text1.model.VideoOrder;
import com.lihaiwei.text1.sink.MysqlSink;
import com.lihaiwei.text1.source.VideoOrderSource;
import org.apache.commons.collections.IteratorUtils;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import org.apache.flink.util.IterableIterator;
import org.apache.flink.util.IterableUtils;
import java.util.List;
import java.util.stream.Collectors;
public class flink06time {
public static void main(String[] args) throws Exception {
// 1、构建执行任务以及任务的启动入口,存储全局相关参数
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 2、获取数据源 -
DataStreamSource<VideoOrder> stringDS = env.addSource(new VideoOrderSource());
// 打印原始数据
stringDS.print("提交前");
// 3.1、进行分组
KeyedStream<VideoOrder,String> keybyDS = stringDS.keyBy(new KeySelector<VideoOrder, String>() {
@Override
public String getKey(VideoOrder value) throws Exception {
return value.getTitle();
}
});
// 3.2、进行时间窗口聚合计算,每个5s统计
SingleOutputStreamOperator<VideoOrder> aggregate = keybyDS.window(TumblingProcessingTimeWindows.of(Time.seconds(10)))
.apply(new WindowFunction<VideoOrder, VideoOrder, String, TimeWindow>() {
@Override
public void apply(String key, TimeWindow window, Iterable<VideoOrder> input, Collector<VideoOrder> out) throws Exception {
// 1、迭代器获取所有数据
List<VideoOrder> list = IteratorUtils.toList(input.iterator());
// 2、将List转换成stream
int total = list.stream().collect(Collectors.summingInt(VideoOrder::getMoney)).intValue();
// 3、初始化中间数据
VideoOrder videoOrder = new VideoOrder();
// 4、将聚合结果添加到中间数据
videoOrder.setMoney(total);
// 5、将分组字段添加到中间数据
videoOrder.setTitle(list.get(0).getTitle());
// 6、将时间添加到中间数据
videoOrder.setCreateTime(list.get(0).getCreateTime());
// 7、收集计算数据并输出
out.collect(videoOrder);
}
});
// 3、输出
aggregate.print("提交后");
// 4、输出到自定义sink
//stringDS.addSink(new MysqlSink());
// 5、/DataStream需要调⽤execute,可以取个名称
env.execute("flink");
}
}
- 运行结果