hive表结构
CREATE EXTERNAL TABLE parquet(
id int,
test_int32 int,
test_int64 bigint,
test_boolean boolean,
test_string string,
test_float float,
test_double double,
test_binary binary
)
stored as parquet
location '/user/hackcoder/hive_db/db_hackcoder.db/parquet';
写parquet文件
public void parquetWrite() throws IOException {
int blockSize =134217728;
int pageSize = 1048576;
int dictionaryPageSize = 1048576;
boolean enableDictionary = true;
boolean validating = false;
String outPath = "/user/hackcoder/hive_db/db_hackcoder.db/parquet/1.parquet";
MessageType schema = MessageTypeParser.parseMessageType("message parquet {\n" +
" required int32 id;\n" +
" required int32 test_int32;\n" +
" required int64 test_int64;\n" +
" required boolean test_boolean;\n" +
" required binary test_string (UTF8);\n" +
" required float test_float;\n" +
" required double test_double;\n" +
" required binary test_binary;\n" +
"}");
GroupFactory factory = new SimpleGroupFactory(schema);
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema, conf);
ParquetWriter<Group> writer = new ParquetWriter(
new Path(outPath),
writeSupport,
CompressionCodecName.UNCOMPRESSED,
blockSize, pageSize, dictionaryPageSize,
enableDictionary,
validating,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf);
Group group = factory.newGroup()
.append("id", 1)
.append("test_int32", 2)
.append("test_int64", 3L)
.append("test_boolean", true)
.append("test_string", "zl")
.append("test_float", 1.1F)
.append("test_double", 2.2D)
.append("test_binary", Binary.fromString("中国"))
;
writer.write(group);
writer.close();
}