MapReducer自定义OutPutFormat

原创

大酥酥 2021-03-03 19:45:42 博主文章分类：MapReducer ©著作权

文章标签 MapReducer自定义OutPutF 文章分类 Hadoop 大数据

©著作权归作者所有：来自51CTO博客作者大酥酥的原创作品，请联系作者获取转载授权，否则将追究法律责任

package MyOutPutFormat;

import DBOutFormat.MysqlDBOutPutFormat; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.File; import java.io.IOException;

import static org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getOutputPath;

public class reduce { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    String input = "data/access.log" ;
    String output ="out" ;

    final Configuration co = new Configuration() ;


    final Job job = Job.getInstance(co);

    //设置class
    job.setJarByClass(reduce.class);
    //设置mapper 和 Reduce
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);


    //设置 Mapper 阶段输出数据的key 和value
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //设置Reducer 阶段输出数据的key 和value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);



    //设置输入和输出路径
    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    //job输出发生变化 ，使用自定义的 outputformat
    job.setOutputFormatClass(MyOutputFormat.class);

    Path outDir = getOutputPath(job) ;
    if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
        File file = new File(outDir.toUri()) ;
        if(file.isDirectory()){
            File[] childrenFiles = file.listFiles();
            for (File childFile:childrenFiles){
                childFile.delete() ;
            }
        }
        file.delete();
    }


    //提交 job
    final boolean result = job.waitForCompletion(true);
    System.exit(result ? 0 : 1);

}

public static class  MyMapper extends Mapper<LongWritable, Text, Text,NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(value,NullWritable.get());
    }
}

public static class MyReducer extends Reducer<Text,NullWritable,Text,NullWritable> {
    @Override
    protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        context.write(key,NullWritable.get());
    }
}

public static class MyOutputFormat extends FileOutputFormat<Text,NullWritable>{


    @Override
    public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(job.getConfiguration()) ;
        FSDataOutputStream fsoUt13= fileSystem.create(new Path("out/13.log"));
        FSDataOutputStream otherOut15= fileSystem.create(new Path("out/15.log"));
        FSDataOutputStream otherOut18= fileSystem.create(new Path("out/18.log"));
        FSDataOutputStream otherOut= fileSystem.create(new Path("out/99.log"));

        return new RecordWriter<Text, NullWritable>() {
            @Override
            public void write(Text key, NullWritable value) throws IOException, InterruptedException {
             String[] st = key.toString().split("\t") ;
             if(st[1].trim().startsWith("13")){
                 fsoUt13.write(key.toString().getBytes());
                 fsoUt13.write("\r".getBytes());
                 fsoUt13.flush();
             }else if(st[1].trim().startsWith("15")){
                 otherOut15.write(key.toString().getBytes());
                 otherOut15.write("\r".getBytes());
                 otherOut15.flush();
             }else if(st[1].trim().startsWith("18")){
                 otherOut18.write(key.toString().getBytes());
                 otherOut18.write("\r".getBytes());
                 otherOut18.flush();
             }else{
                 otherOut.write(key.toString().getBytes());
                 otherOut.write("\r".getBytes());
                 otherOut.flush();
             }
            }

            @Override
            public void close(TaskAttemptContext context) throws IOException, InterruptedException {
                fsoUt13.close();
                otherOut15.close();
                otherOut18.close();
                otherOut.close();
                fileSystem.close();
            }
        };
    }
}

}