1.数据生产
使用java代码往一个文件中写入数据
package com.mobile;
import java.io.*;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* @author kaiya
* @Desc 数据生产
* @date 2020/5/6 20:33
*/
public class Producer {
// 数据要求:caller,callee,buildTime,duration 主叫,被叫,通话建立时间,通话持续时间
// 电话号码
List<String> phoneNumList = new ArrayList<>();
// 电话号码,姓名
Map<String, String> phoneNameMap = new HashMap<>();
private String startTime = "2020-01-01";
private String endTime = "2020-12-31";
/**
* caller :20个电话号码用List存储,Math.random()*20随机产生0-19的随机数,
* 去获取随机的号码,这里获取的随机数箱单与存放号码的索引
*/
/**
* 数据初始化
*/
public void initPhone() {
// 电话号码
phoneNumList.add("17078388295");
phoneNumList.add("13980337439");
phoneNumList.add("14575535933");
phoneNumList.add("19902496992");
phoneNumList.add("18549641558");
phoneNumList.add("17005930322");
phoneNumList.add("18468618874");
phoneNumList.add("18576581848");
phoneNumList.add("15978226424");
phoneNumList.add("15542823911");
phoneNumList.add("17526304161");
phoneNumList.add("15422018558");
phoneNumList.add("17269452013");
phoneNumList.add("17764278604");
phoneNumList.add("15711910344");
phoneNumList.add("15714728273");
phoneNumList.add("16061028454");
phoneNumList.add("16264433631");
phoneNumList.add("17601615878");
phoneNumList.add("15897468949");
// 电话号码,姓名
phoneNameMap.put("17078388295", "施耐庵");
phoneNameMap.put("13980337439", "李世民");
phoneNameMap.put("14575535933", "程咬金");
phoneNameMap.put("19902496992", "猪八戒");
phoneNameMap.put("18549641558", "孙悟空");
phoneNameMap.put("17005930322", "唐三藏");
phoneNameMap.put("18468618874", "沙僧");
phoneNameMap.put("18576581848", "沙悟净");
phoneNameMap.put("15978226424", "猪悟能");
phoneNameMap.put("15542823911", "观世音");
phoneNameMap.put("17526304161", "太白金星");
phoneNameMap.put("15422018558", "赤脚大仙");
phoneNameMap.put("17269452013", "二郎神");
phoneNameMap.put("17764278604", "哮天犬");
phoneNameMap.put("15711910344", "嫦娥");
phoneNameMap.put("15714728273", "玉皇大帝");
phoneNameMap.put("16061028454", "王母娘娘");
phoneNameMap.put("16264433631", "如来");
phoneNameMap.put("17601615878", "白骨精");
phoneNameMap.put("15897468949", "牛魔王");
}
/**
* 生产并返回数据
* 数据要求:caller,callee,buildTime,duration 主叫,被叫,通话建立时间,通话持续时间
*/
public String product() {
/**
* caller :随机从电话号码中取出一个作为主叫
*/
int callerIndex = (int)(Math.random() * phoneNumList.size());
String caller = phoneNumList.get(callerIndex);
/**
* callee :随机从电话号码中取出一个作为被叫
*/
String callee = null;
while (true) {
int calleeIndex = (int)(Math.random() * phoneNumList.size());
callee = phoneNumList.get(calleeIndex);
// 需要主叫和被叫不是同一个号码
if (!callee.equals(caller)) {
break;
}
}
/**
* buildTime
*/
String buildTime = randomBuildTime(startTime, endTime);
/**
* duration
*/
DecimalFormat df = new DecimalFormat("0000");
String duration = df.format((int)30 * 60 * Math.random());
// 拼接最终数据
StringBuffer sb = new StringBuffer();
sb.append(caller).append(",").append(callee).append(",")
.append(buildTime).append(",").append(duration);
return sb.toString();
}
/**
* 返回一个介于开始时间和结束时间之间的时间
* @param startTime 开始时间 yyyy-MM-dd
* @param endTime 结束时间 yyyy-MM-dd
* @return
*/
private String randomBuildTime(String startTime, String endTime) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
try {
Date startDate = sdf.parse(startTime);
Date endDate = sdf.parse(endTime);
// 结束时间 > 开始时间
if (startDate.getTime() >= endDate.getTime()) {
return null;
}
// 获取一个随机时间 (结束时间-开始时间)*随机数+起始时间
Long timeMill = (long)((endDate.getTime() - startDate.getTime())
* Math.random() + startDate.getTime());
Date date = new Date(timeMill);
return sdf2.format(date);
} catch (ParseException e) {
e.printStackTrace();
}
return null;
}
/**
* 将数据写出到文件
* @param filePath
*/
public void writeToFile(String filePath) {
OutputStreamWriter osw = null;
try {
osw = new OutputStreamWriter(new FileOutputStream(new File(filePath), true)
, "UTF-8");
while (true) {
// 每0.5s写一次
Thread.sleep(500);
String data = product();
System.out.println(data);
osw.write(data + "\n");
osw.flush();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
if (osw != null) {
try {
osw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public static void main(String[] args) {
Producer producer = new Producer();
// 初始化时间
producer.initPhone();
producer.writeToFile(args[0]);
}
}
2.flume+kafka消费数据【flume与kafka的搭建在此省略】
2.1 kafka消费数据代码
maven依赖
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.2</version>
</dependency>
消费代码 【2.4】
package com.bigdata.kafka;
import com.bigdata.hbase.HBaseDAO;
import com.bigdata.utils.PropertiesUtils;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Arrays;
import java.util.Properties;
/**
* @author kaiya
* @Desc kafka消费数据写入Hbase
* @date 2020/5/10 12:22
*/
public class Kafka2HBase {
public static void main(String[] args) {
// kafka 消费者
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<>(PropertiesUtils.properties);
// kafka订阅【subscribe】主题【Topic】
kafkaConsumer.subscribe(Arrays.asList(PropertiesUtils.getProperty("kafka.topics")));
// 拉取数据
while (true) {
ConsumerRecords<String, String> poll = kafkaConsumer.poll(100);
for (ConsumerRecord<String, String> consumeRecord : poll) {
// 14575535933,15422018558,2020-01-25 22:41:22,1551
String line = consumeRecord.value();
System.out.println(line);
}
}
}
}
配置文件 properties
# 设置kafka的brokerlist
bootstrap.servers=bigdata111:9092,bigdata112:9092,bigdata113:9092
# 设置消费者所属的消费组
group.id=hbase_consumer_group
# 设置是否自动确认offset
enable.auto.commit=true
# 自动确认offset的时间间隔
auto.commit.interval.ms=30000
# 设置key,value的反序列化类的全名
key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
value.deserializer=org.apache.kafka.common.serialization.StringDeserializer
# 以下为自定义属性设置
# 设置本次消费的主题
kafka.topics=calllog
读取配置文件
package com.bigdata.utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/**
* @author kaiya
* @Desc 读取配置文件的工具类
* @date 2020/5/8 20:51
*/
public class PropertiesUtils {
public static Properties properties = null;
private static final Logger LOG = LoggerFactory.getLogger(PropertiesUtils.class);
static {
// 读取hbase的配置文件
InputStream is = ClassLoader.getSystemResourceAsStream("hbase.properties");
properties = new Properties();
try {
// 装载文件
properties.load(is);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获取配置文件中key对应的值
* @param key
* @return
*/
public static String getProperty(String key) {
return properties.getProperty(key);
}
}
2.2 生产数据
将数据生产的代码用maven打包成jar包,上传到liunx环境,运行jar包生产数据
java -cp Producer-1.0-SNAPSHOT.jar com.mobile.Producer /opt/datas/calllog.csv
2.3 flume拉取数据到kafka
配置文件
# define
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# source
a1.sources.r1.type = exec
#监控的文件,及数据生产的文件
a1.sources.r1.command = tail -F -c +0 /opt/datas/calllog.csv
a1.sources.r1.shell = /bin/bash -c
# sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.brokerList = bigdata111:9092,bigdata112:9092,bigdata113:9092
a1.sinks.k1.topic = calllog
a1.sinks.k1.batchSize = 20
a1.sinks.k1.requiredAcks = 1
# channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# bind
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
启动flume
为了方便,直接保存成脚本
#!/bin/bash
/opt/module/flume-1.8.0/bin/flume-ng agent -n a1 -c /opt/module/flume-1.8.0/conf/ -f /opt/module/flume-1.8.0/jobconf/flume-to_kafka.conf
2.4运行kafka消费数据代码,由于我把消费到的数据打印到控制台,可以直接看到数据