hive表结构

CREATE EXTERNAL TABLE parquet(                                    
    id int,                                                         
    test_int32 int,
    test_int64 bigint,
    test_boolean boolean,
    test_string string,
    test_float float,
    test_double double,
    test_binary binary
)                                                      
stored as parquet
location '/user/hackcoder/hive_db/db_hackcoder.db/parquet';

写parquet文件

public void parquetWrite() throws IOException {
		int blockSize =134217728;
		int pageSize = 1048576;
		int dictionaryPageSize = 1048576;
		boolean enableDictionary = true;
		boolean validating = false;

		String outPath = "/user/hackcoder/hive_db/db_hackcoder.db/parquet/1.parquet";
		MessageType schema = MessageTypeParser.parseMessageType("message parquet {\n" +
				"    required int32 id;\n" +
				"    required int32 test_int32;\n" +
				"    required int64 test_int64;\n" +
				"    required boolean test_boolean;\n" +
				"    required binary test_string (UTF8);\n" +
				"    required float test_float;\n" +
				"    required double test_double;\n" +
				"    required binary test_binary;\n" +
				"}");
		GroupFactory factory = new SimpleGroupFactory(schema);
		GroupWriteSupport writeSupport = new GroupWriteSupport();
		writeSupport.setSchema(schema, conf);

		ParquetWriter<Group> writer = new ParquetWriter(
				new Path(outPath),
				writeSupport,
				CompressionCodecName.UNCOMPRESSED,
				blockSize, pageSize, dictionaryPageSize,
				enableDictionary,
				validating,
				ParquetProperties.WriterVersion.PARQUET_2_0,
				conf);
		Group group = factory.newGroup()
				.append("id", 1)
				.append("test_int32", 2)
				.append("test_int64", 3L)
				.append("test_boolean", true)
				.append("test_string", "zl")
				.append("test_float", 1.1F)
				.append("test_double", 2.2D)
				.append("test_binary", Binary.fromString("中国"))
				;
		writer.write(group);
		writer.close();
	}