package topk;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Comparator;
import java.util.function.Consumer;
import java.util.stream.StreamSupport;

public class SubscriptionVolumeTop10 {

// Mapper
public static class SubscriptionVolumeTop10Mapper extends
Mapper<LongWritable, Text, Text, LongWritable> {

Text outKey = new Text();
LongWritable outValue = new LongWritable();
String[] line;

public void map(LongWritable key, Text value, Context context) {
try {
line = value.toString().split("\t");
outKey.set(line[0]);
outValue.set(Long.parseLong(line[1]));

context.write(outKey, outValue);
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
}
}

// Reducer
public static class SubscriptionVolumeTop10Reducer extends
Reducer<Text, LongWritable, Text, LongWritable> {

public void reduce(Text key, Iterable<LongWritable> values, Context context) {
StreamSupport.stream(values.spliterator(), false)
.map(LongWritable::get)
.sorted(Comparator.reverseOrder())
.limit(8)
.map(LongWritable::new)
.forEach(writeResultToContext(key, context));
}

Consumer<LongWritable> writeResultToContext(Text key, Context context) {
return subscriptionVolume -> {
try {
context.write(key, subscriptionVolume);
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
};
}
}

public static void main(String []args) throws Exception {
System.setProperty("hadoop.home.dir", "C:\\hadoop-2.6.0");

Configuration config = new Configuration();
config.set("fs.defaultFS", "hdfs://192.168.1.10:8020");

Job job = Job.getInstance(config);

job.setJarByClass(SubscriptionVolumeTop10.class);

job.setMapperClass(SubscriptionVolumeTop10Mapper.class);

job.setReducerClass(SubscriptionVolumeTop10Reducer.class);

job.setNumReduceTasks(1);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(LongWritable.class);

FileInputFormat.setInputPaths(job, new Path("/Top10/input"));
FileOutputFormat.setOutputPath(job, new Path("/Top10/output2"));

System.exit(job.waitForCompletion(true)? 0: 1);
}
}
plugins {
id 'java'
}

group 'peerslee'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.10

repositories {
maven {
url 'http://maven.aliyun.com/nexus/content/groups/public/'
}
}

dependencies {
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.2.0'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.2.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common
compile group: 'org.apache.hadoop', name: 'hadoop-common', version: '2.6.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs
compile group: 'org.apache.hadoop', name: 'hadoop-hdfs', version: '2.6.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client
compile group: 'org.apache.hadoop', name: 'hadoop-client', version: '2.6.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core
compile group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '2.6.0'
}