package topk;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.Comparator;
import java.util.function.Consumer;
import java.util.stream.StreamSupport;
public class SubscriptionVolumeTop10 {
// Mapper
public static class SubscriptionVolumeTop10Mapper extends
Mapper<LongWritable, Text, Text, LongWritable> {
Text outKey = new Text();
LongWritable outValue = new LongWritable();
String[] line;
public void map(LongWritable key, Text value, Context context) {
try {
line = value.toString().split("\t");
outKey.set(line[0]);
outValue.set(Long.parseLong(line[1]));
context.write(outKey, outValue);
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
}
}
// Reducer
public static class SubscriptionVolumeTop10Reducer extends
Reducer<Text, LongWritable, Text, LongWritable> {
public void reduce(Text key, Iterable<LongWritable> values, Context context) {
StreamSupport.stream(values.spliterator(), false)
.map(LongWritable::get)
.sorted(Comparator.reverseOrder())
.limit(8)
.map(LongWritable::new)
.forEach(writeResultToContext(key, context));
}
Consumer<LongWritable> writeResultToContext(Text key, Context context) {
return subscriptionVolume -> {
try {
context.write(key, subscriptionVolume);
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
};
}
}
public static void main(String []args) throws Exception {
System.setProperty("hadoop.home.dir", "C:\\hadoop-2.6.0");
Configuration config = new Configuration();
config.set("fs.defaultFS", "hdfs://192.168.1.10:8020");
Job job = Job.getInstance(config);
job.setJarByClass(SubscriptionVolumeTop10.class);
job.setMapperClass(SubscriptionVolumeTop10Mapper.class);
job.setReducerClass(SubscriptionVolumeTop10Reducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path("/Top10/input"));
FileOutputFormat.setOutputPath(job, new Path("/Top10/output2"));
System.exit(job.waitForCompletion(true)? 0: 1);
}
}
plugins {
id 'java'
}
group 'peerslee'
version '1.0-SNAPSHOT'
sourceCompatibility = 1.10
repositories {
maven {
url 'http://maven.aliyun.com/nexus/content/groups/public/'
}
}
dependencies {
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.2.0'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.2.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common
compile group: 'org.apache.hadoop', name: 'hadoop-common', version: '2.6.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs
compile group: 'org.apache.hadoop', name: 'hadoop-hdfs', version: '2.6.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client
compile group: 'org.apache.hadoop', name: 'hadoop-client', version: '2.6.0'
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core
compile group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '2.6.0'
}