参数的设定:参考资料

不错的资料:

http://developer.51cto.com/art/201501/464491.htm

注意:在配置文件server.properties中指定了partition的数量num.partitions。这指的是多单个topic的partition数量之和。若有多个broker,可能partition分布在不同的节点上,则多个broker的所有partitioin数量加起来为num.partitions

0.7中producer的配置有几项是相排斥的,设置了其一,就不能设置其二
比如:
  broker.list 与 zk.connect 不能同时设置
  broker.list 与 partitioner.class 不能同时设置
如果这么干,编译时无所谓,运行时会抛异常

1,指定broker

props.put("broker.list", "0:10.10.10.10:9092");//直接连接kafka
设置这项后,就不能设置partitioner.class了,可是我在运行的时候发现,此时所有的数据都发往10.10.10.10的4个分区,并没有只发给一个分区。我换了syncproducer里的send(topic,partitionid,list)都没用。

2,指定partition
props.put("partitioner.class","com.kafka.myparitioner.CidPartitioner");
props.put("zk.connect", "10.10.10.10:2181");//连接zk

上面的 com.kafka.myparitioner.CidPartitioner 为自己实现的类,注意要自己实现完整的包名
CidPartitioner继承了Partitioner类,其中实现的partition方法指定了通过key计算partition的方法



package com.kafka.myparitioner;

import kafka.producer.Partitioner;
import kafka.utils.VerifiableProperties;

//设定依据key将当前这条消息发送到哪个partition的规则
public class CidPartitioner implements Partitioner {
    public CidPartitioner(VerifiableProperties props) {  
          //注意 : 构造函数的函数体没有东西,但是不能没有构造函数          
    }  
    
    @Override
    public int partition(Object key, int numPartitions) {
        try {            
            long partitionNum = Long.parseLong((String) key);
            return (int) Math.abs(partitionNum % numPartitions);
        } catch (Exception e) {
            return Math.abs(key.hashCode() % numPartitions);
        }
    }
}



 

想要依据key来进行partition的分配,需要在发送消息的时候指定key。



import java.io.IOException;
import java.io.InputStream;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.Properties;
import java.util.regex.Pattern;

import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;

//与KafkaReceiverLTELogSocket的区别在于,指定了消息的partition分配规则
public class KafkaReceiveLTELogSocketPartition extends Thread{
    //按照一定的时间间隔发送LTE信令数据
    String regEx ="[^0-9.\\+\\-\\s+\\,E]"; 
    Pattern p = Pattern.compile(regEx); 
        
    //第一个类型代表key的类型,第二个代表消息的类型
    private final kafka.javaapi.producer.Producer<String, String> producer;
    private final String topic;
    private final Properties props = new Properties();
    
    private final int port = 12345; 
    
    public KafkaReceiveLTELogSocketPartition(String topic) {
        props.put("serializer.class", "kafka.serializer.StringEncoder");
        props.put("metadata.broker.list", "192.168.1.164:9093"); // 配置kafka端口        
        props.put("partitioner.class","com.kafka.myparitioner.CidPartitioner");
        //props.put("zk.connect", "192.168.1.164:2181");//连接zk,新的版本好像不需要
        
        producer = new kafka.javaapi.producer.Producer<String, String>(new ProducerConfig(props));
        this.topic = topic;
    }
    
    public void receiveAndWrite2(String outputFileName , int port) throws IOException{
        ServerSocket serverSocket = new ServerSocket(port);
        Socket socket = serverSocket.accept();
        StringBuilder sb = new StringBuilder();
        try{
            while(true){                
                InputStream istream = socket.getInputStream();
                int count = 0;
                while (count == 0) {
                    count = istream.available();
                }
                byte[] b = new byte[count];
                istream.read(b);
                for(int i = 0 ; i < count ; i ++){
                    if(b[i]=='\n'){ //当遇到流中的换行符时,说明已经获取一条完整的信息,发送        
                        String str = sb.toString();
                        
                        //获取key_cid_str
                        String key_cid_str = str.substring(str.indexOf(":")+1, str.indexOf(","));
                        
                        System.out.println("接收长度:"+str.length());
                        System.out.println(str);
                        //第一个参数代表key的类型,第二个参数代表message的类型
                        producer.send(new KeyedMessage<String, String>(topic,key_cid_str,str));
                        
                        sb = new StringBuilder();
                    }else{        
                        sb.append(Character.toChars(b[i]));
                    }
                }
            }
            
        }finally{
            // 关闭socket,不要再while中关闭,否则发送方每次都要重建连接
            socket.close();
            serverSocket.close();
        }
    }
    
    @Override
    public void run() {
        String filename = "JSON1_Yanming_DriveTesting_09-04.16-17.16-27_TIME.json";
        String outputFileName  = ""+filename;
        
        try {
            receiveAndWrite2(outputFileName,port);
        } catch (IOException e) {    
            e.printStackTrace();
        }        
    }    
    public static void main(String[] args) {
        String topic = "kafka_flume_topic";
        new KafkaReceiveLTELogSocketPartition(topic).start();
    }
}



 

 

利用KafkaConsumer输出(这里使用高级别Consumer) 



import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;


public class KafkaConsumer extends Thread {
    private final ConsumerConnector consumer;
    private final String topic;

    public KafkaConsumer(String topic) {
        consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
        this.topic = topic;
    }

    private static ConsumerConfig createConsumerConfig() {
        Properties props = new Properties();
        props.put("zookeeper.connect", "192.168.1.164:2181"); // zookeeper的地址
        props.put("group.id", "group2"); // 组ID

        //zk连接超时
        props.put("zookeeper.session.timeout.ms", "40000");
        props.put("zookeeper.sync.time.ms", "200");
        props.put("auto.commit.interval.ms", "1000");
        
        return new ConsumerConfig(props);
    }

    @Override
    public void run() {
        Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
        //设定每个topic开几个线程
        topicCountMap.put(topic, new Integer(1));
        
        Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap     = consumer.createMessageStreams(topicCountMap);
        
        KafkaStream<byte[], byte[]> stream = consumerMap.get(topic).get(0);
        ConsumerIterator<byte[], byte[]> it = stream.iterator();
        while (it.hasNext()) {
            MessageAndMetadata<byte[], byte[]> message = it.next();  
            String topic = message.topic();  
            int partition = message.partition();  
            long offset = message.offset();  
            String key = new String(message.key());  
            String msg = new String(message.message());  
            // 在这里处理消息,这里仅简单的输出  
            // 如果消息消费失败,可以将已上信息打印到日志中,活着发送到报警短信和邮件中,以便后续处理  
            System.out.println( " thread : " + Thread.currentThread().getName()  
                    + ", topic : " + topic + ", partition : " + partition + ", offset : " + offset + " , key : "  
                    + key + " , mess : " + msg);              
        }
    }
}



 

附加:Kafka低级别consumer



package com.cuicui.kafkademon;  
  
  
import java.nio.ByteBuffer;  
import java.util.Collections;  
import java.util.HashMap;  
import java.util.List;  
import java.util.Map;  
  
  
import kafka.api.FetchRequest;  
import kafka.api.FetchRequestBuilder;  
import kafka.api.PartitionOffsetRequestInfo;  
import kafka.cluster.Broker;  
import kafka.common.TopicAndPartition;  
import kafka.javaapi.FetchResponse;  
import kafka.javaapi.OffsetRequest;  
import kafka.javaapi.OffsetResponse;  
import kafka.javaapi.PartitionMetadata;  
import kafka.javaapi.TopicMetadata;  
import kafka.javaapi.TopicMetadataRequest;  
import kafka.javaapi.TopicMetadataResponse;  
import kafka.javaapi.consumer.SimpleConsumer;  
import kafka.javaapi.message.ByteBufferMessageSet;  
import kafka.message.Message;  
import kafka.message.MessageAndOffset;  
  
  
/** 
 * offset自己维护 目标topic、partition均由自己分配 
 *  
 * @author <a href="mailto:leicui001@126.com">崔磊</a> 
 * @date 2015年11月4日 上午11:44:15 
 * 
 */  
public class MySimpleConsumer {  
  
  
    public static void main(String[] args) {  
        new MySimpleConsumer().consume();  
    }  
  
  
    /** 
     * 消费消息 
     */  
    public void consume() {  
        int partition = 0;  
  
  
        // 找到leader  
        Broker leaderBroker = findLeader(KafkaProperties.BROKER_CONNECT, KafkaProperties.TOPIC, partition);  
  
  
        // 从leader消费  
        SimpleConsumer simpleConsumer =  
                new SimpleConsumer(leaderBroker.host(), leaderBroker.port(), 20000, 10000, "mySimpleConsumer");  
        long startOffet = 1;  
        int fetchSize = 1000;  
  
  
        while (true) {  
            long offset = startOffet;  
            // 添加fetch指定目标tipic,分区,起始offset及fetchSize(字节),可以添加多个fetch  
            FetchRequest req =  
                    new FetchRequestBuilder().addFetch(KafkaProperties.TOPIC, 0, startOffet, fetchSize).build();  
  
  
            // 拉取消息  
            FetchResponse fetchResponse = simpleConsumer.fetch(req);  
  
  
            ByteBufferMessageSet messageSet = fetchResponse.messageSet(KafkaProperties.TOPIC, partition);  
            for (MessageAndOffset messageAndOffset : messageSet) {  
                Message mess = messageAndOffset.message();  
                ByteBuffer payload = mess.payload();  
                byte[] bytes = new byte[payload.limit()];  
                payload.get(bytes);  
                String msg = new String(bytes);  
  
  
                offset = messageAndOffset.offset();  
                System.out.println("partition : " + 3 + ", offset : " + offset + "  mess : " + msg);  
            }  
            // 继续消费下一批  
            startOffet = offset + 1;  
        }  
    }  
  
  
    /** 
     * 找到制定分区的leader broker 
     *  
     * @param brokerHosts broker地址,格式为:“host1:port1,host2:port2,host3:port3” 
     * @param topic topic 
     * @param partition 分区 
     * @return 
     */  
    public Broker findLeader(String brokerHosts, String topic, int partition) {  
        Broker leader = findPartitionMetadata(brokerHosts, topic, partition).leader();  
        System.out.println(String.format("Leader tor topic %s, partition %d is %s:%d", topic, partition, leader.host(),  
                leader.port()));  
        return leader;  
    }  
  
  
    /** 
     * 找到指定分区的元数据 
     *  
     * @param brokerHosts broker地址,格式为:“host1:port1,host2:port2,host3:port3” 
     * @param topic topic 
     * @param partition 分区 
     * @return 元数据 
     */  
    private PartitionMetadata findPartitionMetadata(String brokerHosts, String topic, int partition) {  
        PartitionMetadata returnMetaData = null;  
        for (String brokerHost : brokerHosts.split(",")) {  
            SimpleConsumer consumer = null;  
            String[] splits = brokerHost.split(":");  
            consumer = new SimpleConsumer(splits[0], Integer.valueOf(splits[1]), 100000, 64 * 1024, "leaderLookup");  
            List<String> topics = Collections.singletonList(topic);  
            TopicMetadataRequest request = new TopicMetadataRequest(topics);  
            TopicMetadataResponse response = consumer.send(request);  
            List<TopicMetadata> topicMetadatas = response.topicsMetadata();  
            for (TopicMetadata topicMetadata : topicMetadatas) {  
                for (PartitionMetadata PartitionMetadata : topicMetadata.partitionsMetadata()) {  
                    if (PartitionMetadata.partitionId() == partition) {  
                        returnMetaData = PartitionMetadata;  
                    }  
                }  
            }  
            if (consumer != null)  
                consumer.close();  
        }  
        return returnMetaData;  
    }  
  
  
    /** 
     * 根据时间戳找到某个客户端消费的offset 
     *  
     * @param consumer SimpleConsumer 
     * @param topic topic 
     * @param partition 分区 
     * @param clientID 客户端的ID 
     * @param whichTime 时间戳 
     * @return offset 
     */  
    public long getLastOffset(SimpleConsumer consumer, String topic, int partition, String clientID, long whichTime) {  
        TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);  
        Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo =  
                new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();  
        requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));  
        OffsetRequest request = new OffsetRequest(requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientID);  
        OffsetResponse response = consumer.getOffsetsBefore(request);  
        long[] offsets = response.offsets(topic, partition);  
        return offsets[0];  
    }  
}


View Code