文件格式:SequenceFile
------------------
1.SequenceFile
Key-Value对方式。
2.不是文本文件,是二进制文件。
3.可切割
因为有同步点。
reader.sync(pos); //定位到pos之后的第一个同步点。
writer.sync(); //写入同步点
4.压缩方式
不压缩
record压缩 //只压缩value
块压缩 //按照多个record形成一个block.
package com.it18zhang.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.junit.Test;
import java.io.IOException;
/**
*序列文件
*/
public class TestSeqFile {
/**
* 写操作
*/
@Test
public void save() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf,p, IntWritable.class, Text.class);
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
//添加一个同步点
// 同步标示用于在读取文件时能够从任意位置开始识别记录边界
writer.sync();
}
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
if(i % 2 == 0){
writer.sync();
}
}
writer.close();
}
/**
* 写操作
*/
@Test
public void zipGzip() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Writer writer = SequenceFile.createWriter(fs,
conf,
p,
IntWritable.class,
Text.class,
SequenceFile.CompressionType.BLOCK,
new GzipCodec());
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
//添加一个同步点
writer.sync();
}
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
if(i % 2 == 0){
writer.sync();
}
}
writer.close();
}
/**
* 读操作,循环输出所有key-value
*/
@Test
public void read() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
while(reader.next(key,value)){
System.out.println(key.get() + " : " + value.toString());
}
reader.close();
}
/**
* 读操作,得到当前value
*/
@Test
public void read2() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
while(reader.next(key)){
reader.getCurrentValue(value);
System.out.println(value.toString());
}
reader.close();
}
/**
* 读操作
*/
@Test
public void read3() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
reader.seek(288);
reader.next(key,value);
System.out.println(value.toString());
reader.close();
}
/**
*
* 操纵同步点
*/
@Test
public void read4() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
reader.sync(648);
while(reader.next(key,value)){
System.out.println(reader.getPosition() + " " + key.get() + "-" + value.toString());
}
reader.close();
}
}
文件格式:MapFile
--------------------
1.Key-value
2.key按升序写入(可重复)。
3.mapFile对应一个目录,目录下有index和data文件,都是序列文件。
4.index文件划分key区间,用于快速定位。