文件格式:SequenceFile
------------------
    1.SequenceFile
        Key-Value对方式。

    2.不是文本文件,是二进制文件。

    3.可切割
        因为有同步点。
        reader.sync(pos);    //定位到pos之后的第一个同步点。
        writer.sync();        //写入同步点

    4.压缩方式
        不压缩
        record压缩            //只压缩value
        块压缩                //按照多个record形成一个block.

 

package com.it18zhang.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.junit.Test;

import java.io.IOException;

/**
 *序列文件
 */
public class TestSeqFile {
    /**
     * 写操作
     */
    @Test
    public void save() throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path("d:/seq/1.seq") ;
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf,p, IntWritable.class, Text.class);
        for(int i = 0 ; i < 10 ; i ++){
            writer.append(new IntWritable(i),new Text("tom" + i));
            //添加一个同步点
            // 同步标示用于在读取文件时能够从任意位置开始识别记录边界
            writer.sync();
        }
        for(int i = 0 ; i < 10 ; i ++){
            writer.append(new IntWritable(i),new Text("tom" + i));
            if(i % 2 == 0){
                writer.sync();
            }
        }
        writer.close();
    }

    /**
     * 写操作
     */
    @Test
    public void zipGzip() throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path("d:/seq/1.seq") ;
        SequenceFile.Writer writer = SequenceFile.createWriter(fs,
                conf,
                p,
                IntWritable.class,
                Text.class,
                SequenceFile.CompressionType.BLOCK,
                new GzipCodec());
        for(int i = 0 ; i < 10 ; i ++){
            writer.append(new IntWritable(i),new Text("tom" + i));
            //添加一个同步点
            writer.sync();
        }
        for(int i = 0 ; i < 10 ; i ++){
            writer.append(new IntWritable(i),new Text("tom" + i));
            if(i % 2 == 0){
                writer.sync();
            }
        }
        writer.close();
    }

    /**
     * 读操作,循环输出所有key-value
     */
    @Test
    public void read() throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path("d:/seq/1.seq") ;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);

        IntWritable key = new IntWritable();
        Text value = new Text() ;
        while(reader.next(key,value)){
            System.out.println(key.get() + " : " + value.toString());
        }
        reader.close();
    }

    /**
     * 读操作,得到当前value
     */
    @Test
    public void read2() throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path("d:/seq/1.seq") ;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);

        IntWritable key = new IntWritable();
        Text value = new Text() ;
        while(reader.next(key)){
            reader.getCurrentValue(value);
            System.out.println(value.toString());
        }
        reader.close();
    }

    /**
     * 读操作
     */
    @Test
    public void read3() throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path("d:/seq/1.seq") ;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
        IntWritable key = new IntWritable();
        Text value = new Text() ;
        reader.seek(288);

        reader.next(key,value);
        System.out.println(value.toString());
        reader.close();
    }

    /**
     *
     * 操纵同步点
     */
    @Test
    public void read4() throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path("d:/seq/1.seq") ;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
        IntWritable key = new IntWritable();
        Text value = new Text() ;

        reader.sync(648);
        while(reader.next(key,value)){
            System.out.println(reader.getPosition() + "   " + key.get() + "-" + value.toString());
        }
        reader.close();
    }
}

文件格式:MapFile
--------------------
    1.Key-value
    2.key按升序写入(可重复)。
    3.mapFile对应一个目录,目录下有index和data文件,都是序列文件。
    4.index文件划分key区间,用于快速定位。