需求

找出每个月温度最高的两天

数据集
1949-10-01 14:21:02	34c
1949-10-01 19:21:02	38c
1949-10-02 14:01:02	36c
1950-01-01 11:21:02	32c
1950-10-01 12:21:02	37c
1951-12-01 12:21:02	23c
1950-10-02 12:21:02	41c
1950-10-03 12:21:02	27c
1951-07-01 12:21:02	45c
1951-07-02 12:21:02	46c
1951-07-03 12:21:03	47c
案例分析

在MR中,原语是“相同”key的键值对为一组,调用一次reduce方法,方法内迭代这组数据计算。

找出每个月气温最高的两天
二次排序,分组比较器和排序比较器不一样
map:key LongWritable
value Text
日期+气温 将value中的指标拿出来组成一个新的key
分组的时候,需要将新的key拆开来比较

需要重写排序比较器/分组比较器

步骤
  • 自定义数据类型Weather
    包含时间
    包含温度
    自定义排序比较规则
  • 自定义分组比较
    年月相同被视为相同的key
    那么reduce迭代时,相同年月的记录有可能是同一天的,reduce中需要判断是否同一天
  • 数据量很大
    全量数据可以切分成最少按一个月份的数据量进行判断
    这种业务场景可以设置多个reduce
    通过实现partition
具体实现

MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_ide


Weather

public class Weather implements WritableComparable<Weather> {
    private Integer year;
    private Integer month;
    private Integer day;
    private Double temperature;
    //默认比较器
//    @Override
//    public int compareTo(Weather o) {
//        System.out.println("******Weather*******compareTo**********");
//        int result=this.year.compareTo(o.getYear());
//        if (result==0){
//            result=this.month.compareTo(o.getMonth());
//            if (result==0){
//                result=this.day.compareTo(o.getDay());
//                if (result==0){
//                    o.getTemperature().compareTo(this.temperature);
//                }
//            }
//        }
//        return result;
//    }


    @Override
    public int compareTo(Weather o) {
        System.out.println("********Weather********compareTo*****************");
        int result=this.year.compareTo(o.getYear());
        if (result==0){
            result=this.month.compareTo(o.getMonth());
            if (result==0){
                o.getTemperature().compareTo(this.temperature);
            }
        }
        return result;
    }
    //当前类Weather作为key,并且对应的job作业没有"通过job.setSortComparatorClass(...);来指定比较器的时候",使用该比较器
    //请参考Text作为key的案例:wordcount
    //年 月 升序 温度 倒叙
    public static class Comparator extends WritableComparator{
        public Comparator(){
            super(Weather.class,true);
        }
        public int compare(WritableComparable a,WritableComparable b){
            System.out.println("======Weather=====subClass=====Comparator==========================");
        Weather wa=(Weather) a;
        Weather wb=(Weather) b;
       return wa.compareTo(wb);
        }
    }
    static {
        //register this comparator
        WritableComparator.define(Weather.class,new Comparator());
    }

    //序列化
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(year);
        dataOutput.writeInt(month);
        dataOutput.writeInt(day);
        dataOutput.writeDouble(temperature);
    }

    //反序列化
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        setYear(dataInput.readInt());
        setMonth(dataInput.readInt());
        setDay(dataInput.readInt());
        setTemperature(dataInput.readDouble());
    }

    public Integer getYear() {
        return year;
    }

    public void setYear(Integer year) {
        this.year = year;
    }

    public Integer getMonth() {
        return month;
    }

    public void setMonth(Integer month) {
        this.month = month;
    }

    public Integer getDay() {
        return day;
    }

    public void setDay(Integer day) {
        this.day = day;
    }

    public Double getTemperature() {
        return temperature;
    }

    public void setTemperature(Double temperature) {
        this.temperature = temperature;
    }
}

WeatherGroupingComparator

MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_Text_02


WeatherMain

public class WeatherMain {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        if (args==null||args.length!=2){
            System.out.println("Usage:yarn jar myweather.jar com.bupt.weather <input> <output>");
            System.exit(1);
        }
        Configuration configuration=new Configuration(true);
        configuration.set("mapreduce.framework.name","local");
        Job job= Job.getInstance(configuration);
        job.setJobName("统计每个月温度最高的两天");
        job.setJarByClass(WeatherMain.class);
        job.setMapperClass(WeatherMapper.class);
        job.setMapOutputKeyClass(Weather.class);
        //1949-10-01 14:21:02 34c
        job.setMapOutputValueClass(Text.class);
        //设置reduce
        job.setReducerClass(WeatherReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        Path outputPath=new Path(args[1]);
        if (outputPath.getFileSystem(configuration).exists(outputPath)){
            outputPath.getFileSystem(configuration).delete(outputPath,true);
        }
        FileOutputFormat.setOutputPath(job,outputPath);
        //设置分区,并指定reducerTask的数量
        job.setNumReduceTasks(2);
        job.setPartitionerClass(WeatherPatitioner.class);
        //设置排序比较器(如果设置的排序比较器,将不再走key默认的内部比较器类注册的排序比较)
        job.setSortComparatorClass(WeatherSortComparator.class);
        //设置分组比较器
        job.setGroupingComparatorClass(WeatherGroupingComparator.class);
        job.waitForCompletion(true);
    }
}

WeatherMapper

public class WeatherMapper extends Mapper<LongWritable, Text, Weather, Text> {
    private Weather weather = new Weather();

    @Override
    protected void map(LongWritable key, Text value, Context context) {
        //1949-10-01 14:21:02 34c
        //->Weather
        String line = value.toString().trim();
        String[] datas = line.split("\t");
        //设置温度:34->"34"->34.0
        weather.setTemperature(Double.parseDouble(datas[1].substring(0, datas[1].length() - 1)));
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

        try {
            Date date = simpleDateFormat.parse(datas[0]);
            Calendar calendar = Calendar.getInstance();
            calendar.setTime(date);
            weather.setYear(calendar.get(Calendar.YEAR));
            //获取出的月份比实际值小1
            weather.setMonth(calendar.get(Calendar.MONTH) + 1);
            weather.setDay(calendar.get(Calendar.DAY_OF_MONTH));
            //输出到环形缓冲区
            context.write(weather, value);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

WeatherPatitioner

MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_ide_03


WeatherReducer

public class WeatherReducer extends Reducer<Weather, Text,Text, NullWritable> {
    @Override
    protected  void reduce(Weather key,Iterable<Text> values,Context context) throws IOException, InterruptedException {
        //Weather:1949-10-01 38
        //1949-10-02 37
        int day=-1;
        //遍历values
        for (Text value:values) {
            //第一条:也就是当前月的最高温度
            if (day==-1){
                context.write(value,NullWritable.get());
                //获取最高温度对应的 号(天)
                day=key.getDay();
            }else{//非第一条
                if (day!=key.getDay()){
                    context.write(value,NullWritable.get());
                    break;
                }
            }
        }
    }
}

WeatherSortComparator

MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_比较器_04


配置传参

MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_Text_05

测试结果

MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_Text_06


MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_比较器_07


MapReduce案例求每年最高气温 课题分析 mapreduce分析天气数据_ide_08