参数的设定:参考资料
不错的资料:
http://developer.51cto.com/art/201501/464491.htm
注意:在配置文件server.properties中指定了partition的数量num.partitions。这指的是多单个topic的partition数量之和。若有多个broker,可能partition分布在不同的节点上,则多个broker的所有partitioin数量加起来为num.partitions
0.7中producer的配置有几项是相排斥的,设置了其一,就不能设置其二
比如:
broker.list 与 zk.connect 不能同时设置
broker.list 与 partitioner.class 不能同时设置
如果这么干,编译时无所谓,运行时会抛异常
1,指定broker
props.put("broker.list", "0:10.10.10.10:9092");//直接连接kafka
设置这项后,就不能设置partitioner.class了,可是我在运行的时候发现,此时所有的数据都发往10.10.10.10的4个分区,并没有只发给一个分区。我换了syncproducer里的send(topic,partitionid,list)都没用。
2,指定partition
props.put("partitioner.class","com.kafka.myparitioner.CidPartitioner");
props.put("zk.connect", "10.10.10.10:2181");//连接zk
上面的 com.kafka.myparitioner.CidPartitioner 为自己实现的类,注意要自己实现完整的包名
CidPartitioner继承了Partitioner类,其中实现的partition方法指定了通过key计算partition的方法
package com.kafka.myparitioner;
import kafka.producer.Partitioner;
import kafka.utils.VerifiableProperties;
//设定依据key将当前这条消息发送到哪个partition的规则
public class CidPartitioner implements Partitioner {
public CidPartitioner(VerifiableProperties props) {
//注意 : 构造函数的函数体没有东西,但是不能没有构造函数
}
@Override
public int partition(Object key, int numPartitions) {
try {
long partitionNum = Long.parseLong((String) key);
return (int) Math.abs(partitionNum % numPartitions);
} catch (Exception e) {
return Math.abs(key.hashCode() % numPartitions);
}
}
}
想要依据key来进行partition的分配,需要在发送消息的时候指定key。
import java.io.IOException;
import java.io.InputStream;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.Properties;
import java.util.regex.Pattern;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
//与KafkaReceiverLTELogSocket的区别在于,指定了消息的partition分配规则
public class KafkaReceiveLTELogSocketPartition extends Thread{
//按照一定的时间间隔发送LTE信令数据
String regEx ="[^0-9.\\+\\-\\s+\\,E]";
Pattern p = Pattern.compile(regEx);
//第一个类型代表key的类型,第二个代表消息的类型
private final kafka.javaapi.producer.Producer<String, String> producer;
private final String topic;
private final Properties props = new Properties();
private final int port = 12345;
public KafkaReceiveLTELogSocketPartition(String topic) {
props.put("serializer.class", "kafka.serializer.StringEncoder");
props.put("metadata.broker.list", "192.168.1.164:9093"); // 配置kafka端口
props.put("partitioner.class","com.kafka.myparitioner.CidPartitioner");
//props.put("zk.connect", "192.168.1.164:2181");//连接zk,新的版本好像不需要
producer = new kafka.javaapi.producer.Producer<String, String>(new ProducerConfig(props));
this.topic = topic;
}
public void receiveAndWrite2(String outputFileName , int port) throws IOException{
ServerSocket serverSocket = new ServerSocket(port);
Socket socket = serverSocket.accept();
StringBuilder sb = new StringBuilder();
try{
while(true){
InputStream istream = socket.getInputStream();
int count = 0;
while (count == 0) {
count = istream.available();
}
byte[] b = new byte[count];
istream.read(b);
for(int i = 0 ; i < count ; i ++){
if(b[i]=='\n'){ //当遇到流中的换行符时,说明已经获取一条完整的信息,发送
String str = sb.toString();
//获取key_cid_str
String key_cid_str = str.substring(str.indexOf(":")+1, str.indexOf(","));
System.out.println("接收长度:"+str.length());
System.out.println(str);
//第一个参数代表key的类型,第二个参数代表message的类型
producer.send(new KeyedMessage<String, String>(topic,key_cid_str,str));
sb = new StringBuilder();
}else{
sb.append(Character.toChars(b[i]));
}
}
}
}finally{
// 关闭socket,不要再while中关闭,否则发送方每次都要重建连接
socket.close();
serverSocket.close();
}
}
@Override
public void run() {
String filename = "JSON1_Yanming_DriveTesting_09-04.16-17.16-27_TIME.json";
String outputFileName = ""+filename;
try {
receiveAndWrite2(outputFileName,port);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
String topic = "kafka_flume_topic";
new KafkaReceiveLTELogSocketPartition(topic).start();
}
}
利用KafkaConsumer输出(这里使用高级别Consumer)
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
public class KafkaConsumer extends Thread {
private final ConsumerConnector consumer;
private final String topic;
public KafkaConsumer(String topic) {
consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
this.topic = topic;
}
private static ConsumerConfig createConsumerConfig() {
Properties props = new Properties();
props.put("zookeeper.connect", "192.168.1.164:2181"); // zookeeper的地址
props.put("group.id", "group2"); // 组ID
//zk连接超时
props.put("zookeeper.session.timeout.ms", "40000");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.commit.interval.ms", "1000");
return new ConsumerConfig(props);
}
@Override
public void run() {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
//设定每个topic开几个线程
topicCountMap.put(topic, new Integer(1));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
KafkaStream<byte[], byte[]> stream = consumerMap.get(topic).get(0);
ConsumerIterator<byte[], byte[]> it = stream.iterator();
while (it.hasNext()) {
MessageAndMetadata<byte[], byte[]> message = it.next();
String topic = message.topic();
int partition = message.partition();
long offset = message.offset();
String key = new String(message.key());
String msg = new String(message.message());
// 在这里处理消息,这里仅简单的输出
// 如果消息消费失败,可以将已上信息打印到日志中,活着发送到报警短信和邮件中,以便后续处理
System.out.println( " thread : " + Thread.currentThread().getName()
+ ", topic : " + topic + ", partition : " + partition + ", offset : " + offset + " , key : "
+ key + " , mess : " + msg);
}
}
}
附加:Kafka低级别consumer
package com.cuicui.kafkademon;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.cluster.Broker;
import kafka.common.TopicAndPartition;
import kafka.javaapi.FetchResponse;
import kafka.javaapi.OffsetRequest;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.PartitionMetadata;
import kafka.javaapi.TopicMetadata;
import kafka.javaapi.TopicMetadataRequest;
import kafka.javaapi.TopicMetadataResponse;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.Message;
import kafka.message.MessageAndOffset;
/**
* offset自己维护 目标topic、partition均由自己分配
*
* @author <a href="mailto:leicui001@126.com">崔磊</a>
* @date 2015年11月4日 上午11:44:15
*
*/
public class MySimpleConsumer {
public static void main(String[] args) {
new MySimpleConsumer().consume();
}
/**
* 消费消息
*/
public void consume() {
int partition = 0;
// 找到leader
Broker leaderBroker = findLeader(KafkaProperties.BROKER_CONNECT, KafkaProperties.TOPIC, partition);
// 从leader消费
SimpleConsumer simpleConsumer =
new SimpleConsumer(leaderBroker.host(), leaderBroker.port(), 20000, 10000, "mySimpleConsumer");
long startOffet = 1;
int fetchSize = 1000;
while (true) {
long offset = startOffet;
// 添加fetch指定目标tipic,分区,起始offset及fetchSize(字节),可以添加多个fetch
FetchRequest req =
new FetchRequestBuilder().addFetch(KafkaProperties.TOPIC, 0, startOffet, fetchSize).build();
// 拉取消息
FetchResponse fetchResponse = simpleConsumer.fetch(req);
ByteBufferMessageSet messageSet = fetchResponse.messageSet(KafkaProperties.TOPIC, partition);
for (MessageAndOffset messageAndOffset : messageSet) {
Message mess = messageAndOffset.message();
ByteBuffer payload = mess.payload();
byte[] bytes = new byte[payload.limit()];
payload.get(bytes);
String msg = new String(bytes);
offset = messageAndOffset.offset();
System.out.println("partition : " + 3 + ", offset : " + offset + " mess : " + msg);
}
// 继续消费下一批
startOffet = offset + 1;
}
}
/**
* 找到制定分区的leader broker
*
* @param brokerHosts broker地址,格式为:“host1:port1,host2:port2,host3:port3”
* @param topic topic
* @param partition 分区
* @return
*/
public Broker findLeader(String brokerHosts, String topic, int partition) {
Broker leader = findPartitionMetadata(brokerHosts, topic, partition).leader();
System.out.println(String.format("Leader tor topic %s, partition %d is %s:%d", topic, partition, leader.host(),
leader.port()));
return leader;
}
/**
* 找到指定分区的元数据
*
* @param brokerHosts broker地址,格式为:“host1:port1,host2:port2,host3:port3”
* @param topic topic
* @param partition 分区
* @return 元数据
*/
private PartitionMetadata findPartitionMetadata(String brokerHosts, String topic, int partition) {
PartitionMetadata returnMetaData = null;
for (String brokerHost : brokerHosts.split(",")) {
SimpleConsumer consumer = null;
String[] splits = brokerHost.split(":");
consumer = new SimpleConsumer(splits[0], Integer.valueOf(splits[1]), 100000, 64 * 1024, "leaderLookup");
List<String> topics = Collections.singletonList(topic);
TopicMetadataRequest request = new TopicMetadataRequest(topics);
TopicMetadataResponse response = consumer.send(request);
List<TopicMetadata> topicMetadatas = response.topicsMetadata();
for (TopicMetadata topicMetadata : topicMetadatas) {
for (PartitionMetadata PartitionMetadata : topicMetadata.partitionsMetadata()) {
if (PartitionMetadata.partitionId() == partition) {
returnMetaData = PartitionMetadata;
}
}
}
if (consumer != null)
consumer.close();
}
return returnMetaData;
}
/**
* 根据时间戳找到某个客户端消费的offset
*
* @param consumer SimpleConsumer
* @param topic topic
* @param partition 分区
* @param clientID 客户端的ID
* @param whichTime 时间戳
* @return offset
*/
public long getLastOffset(SimpleConsumer consumer, String topic, int partition, String clientID, long whichTime) {
TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo =
new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));
OffsetRequest request = new OffsetRequest(requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientID);
OffsetResponse response = consumer.getOffsetsBefore(request);
long[] offsets = response.offsets(topic, partition);
return offsets[0];
}
}
View Code