diff --git a/Encodings.md b/Encodings.md index 954c32fae..51c6d8060 100644 --- a/Encodings.md +++ b/Encodings.md @@ -335,7 +335,7 @@ Note that, even for FIXED_LEN_BYTE_ARRAY, all lengths are encoded despite the re ### Byte Stream Split: (BYTE_STREAM_SPLIT = 9) -Supported Types: INT32, INT64, FLOAT, DOUBLE, FIXED_LEN_BYTE_ARRAY +Supported Types: FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY This encoding does not reduce the size of the data but can lead to a significantly better compression ratio and speed when a compression algorithm is used afterwards. diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index ce4f35dd0..27d404374 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -526,12 +526,15 @@ enum Encoding { */ RLE_DICTIONARY = 8; - /** Encoding for fixed-width data (INT32, INT64, FLOAT, DOUBLE, FIXED_LEN_BYTE_ARRAY). + /** Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). K byte-streams are created where K is the size in bytes of the data type. The individual bytes of a value are scattered to the corresponding stream and the streams are concatenated. This itself does not reduce the size of the data but can lead to better compression afterwards. + + Added in 2.8 for FLOAT and DOUBLE. + Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ BYTE_STREAM_SPLIT = 9; }