继上个博文:ORC文件读取java代码实现汇总(开发笔记)我们直接上代码:
package com.lu.Main;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import io.airlift.compress.lzo.LzoCodec;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.BasicConfigurator;
public class TestOrcWriter {
public static void main(String[] args) throws Exception {
BasicConfigurator.configure(); //自动快速地使用缺省Log4j环境
JobConf conf = new JobConf();
FileSystem fs = FileSystem.get(conf);
Path outputPath = new Path("/usr/local/orcoutput/112.orc");
StructObjectInspector inspector =
(StructObjectInspector) ObjectInspectorFactory
.getReflectionObjectInspector(MyRow.class,
ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
OrcSerde serde = new OrcSerde();
OutputFormat outFormat = new OrcOutputFormat();
RecordWriter writer = outFormat.getRecordWriter(fs, conf,
outputPath.toString(), Reporter.NULL);
writer.write(NullWritable.get(),
serde.serialize(new MyRow("张三",20), inspector));
writer.write(NullWritable.get(),
serde.serialize(new MyRow("李四",22), inspector));
writer.write(NullWritable.get(),
serde.serialize(new MyRow("王五",30), inspector));
writer.close(Reporter.NULL);
fs.close();
System.out.println("write success .");
// LzoCodec lzoCodec = new LzoCodec();
// ZlibCodec zlibCodec = new ZlibCodec();
// SnappyCodec snappyCodec = new SnappyCodec();
}
static class MyRow implements Writable {
String name;
int age;
MyRow(String name,int age){
this.name = name;
this.age = age;
}
public void readFields(DataInput arg0) throws IOException {
throw new UnsupportedOperationException("no write");
}
public void write(DataOutput arg0) throws IOException {
throw new UnsupportedOperationException("no read");
}
}
}
结果:
2022-03-19 10:51:36,195 INFO [org.apache.orc.impl.PhysicalFsWriter] - ORC writer created for path: /usr/local/orcoutput/112.orc with stripeSize: 67108864 blockSize: 268435456 compression: ZLIB bufferSize: 262144
0 [main] INFO org.apache.orc.impl.PhysicalFsWriter - ORC writer created for path: /usr/local/orcoutput/112.orc with stripeSize: 67108864 blockSize: 268435456 compression: ZLIB bufferSize: 262144
2022-03-19 10:51:36,233 INFO [org.apache.orc.impl.WriterImpl] - ORC writer created for path: /usr/local/orcoutput/112.orc with stripeSize: 67108864 blockSize: 268435456 compression: ZLIB bufferSize: 262144
38 [main] INFO org.apache.orc.impl.WriterImpl - ORC writer created for path: /usr/local/orcoutput/112.orc with stripeSize: 67108864 blockSize: 268435456 compression: ZLIB bufferSize: 262144
write success .