MapReduce - topk

原创

peerslee 2022-10-28 11:59:18 ©著作权

文章标签 hadoop apache mapreduce 文章分类 运维

©著作权归作者所有：来自51CTO博客作者peerslee的原创作品，请联系作者获取转载授权，否则将追究法律责任

package topk;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Comparator;
import java.util.function.Consumer;
import java.util.stream.StreamSupport;

public class SubscriptionVolumeTop10 {

    // Mapper
    public static class SubscriptionVolumeTop10Mapper extends
            Mapper<LongWritable, Text, Text, LongWritable> {

        Text outKey = new Text();
        LongWritable outValue = new LongWritable();
        String[] line;

        public void map(LongWritable key, Text value, Context context) {
            try {
                line = value.toString().split("\t");
                outKey.set(line[0]);
                outValue.set(Long.parseLong(line[1]));

                context.write(outKey, outValue);
            } catch (IOException | InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    // Reducer
    public static class SubscriptionVolumeTop10Reducer extends
            Reducer<Text, LongWritable, Text, LongWritable> {

        public void reduce(Text key, Iterable<LongWritable> values, Context context) {
            StreamSupport.stream(values.spliterator(), false)
                    .map(LongWritable::get)
                    .sorted(Comparator.reverseOrder())
                    .limit(8)
                    .map(LongWritable::new)
                    .forEach(writeResultToContext(key, context));
        }

        Consumer<LongWritable> writeResultToContext(Text key, Context context) {
            return subscriptionVolume -> {
                try {
                    context.write(key, subscriptionVolume);
                } catch (IOException | InterruptedException e) {
                    e.printStackTrace();
                }
            };
        }
    }

    public static void main(String []args) throws Exception {
        System.setProperty("hadoop.home.dir", "C:\\hadoop-2.6.0");

        Configuration config = new Configuration();
        config.set("fs.defaultFS", "hdfs://192.168.1.10:8020");

        Job job = Job.getInstance(config);

        job.setJarByClass(SubscriptionVolumeTop10.class);

        job.setMapperClass(SubscriptionVolumeTop10Mapper.class);

        job.setReducerClass(SubscriptionVolumeTop10Reducer.class);

        job.setNumReduceTasks(1);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job, new Path("/Top10/input"));
        FileOutputFormat.setOutputPath(job, new Path("/Top10/output2"));

        System.exit(job.waitForCompletion(true)? 0: 1);
    }
}

plugins {
    id 'java'
}

group 'peerslee'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.10

repositories {
    maven {
        url 'http://maven.aliyun.com/nexus/content/groups/public/'
    }
}

dependencies {
    testImplementation 'org.junit.jupiter:junit-jupiter-api:5.2.0'
    testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.2.0'
    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common
    compile group: 'org.apache.hadoop', name: 'hadoop-common', version: '2.6.0'
    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs
    compile group: 'org.apache.hadoop', name: 'hadoop-hdfs', version: '2.6.0'
    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client
    compile group: 'org.apache.hadoop', name: 'hadoop-client', version: '2.6.0'
    // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core
    compile group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '2.6.0'
}