Details
-
Task
-
Resolution: Done
-
Major
-
7.1.2
-
1
-
CX Sprint 286
Description
With the support of Analytics to read Parquet data from S3 external storage, we need to ensure that Analytics can handle the Parquet specialized types. More details can be found here:
https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
Below is a change that covers Parquet types, it is already merged on dev tests but needs to be part of the QE tests, the change for the queries to be used and data file attached.
https://review.couchbase.org/c/cbas-core/+/176758
Parquet schema:
message test {
|
required boolean boolean_field;
|
required int32 int8_field (INTEGER(8,true));
|
required int32 int16_field (INTEGER(16,true));
|
required int32 int32_field;
|
required int64 int64_field;
|
required int32 uint8_field (INTEGER(8,false));
|
required int32 uint16_field (INTEGER(16,false));
|
required int32 uint32_field (INTEGER(32,false));
|
required int64 uint64_field (INTEGER(64,false));
|
required int64 overflowed_uint64_field (INTEGER(64,false));
|
required float float_field;
|
required double double_field;
|
required int32 decimal32_field (DECIMAL(5, 4));
|
required int64 decimal64_field (DECIMAL(12, 9));
|
required fixed_len_byte_array(10) decimal_fixed80_field (DECIMAL(22,21));
|
required binary decimal_arbitrary_length_field (DECIMAL(22,21));
|
required binary binary_field;
|
required binary string_field (UTF8);
|
required binary enum_field (ENUM);
|
required binary json_field (JSON);
|
required int32 date_field (DATE);
|
required int32 time32_millis_field (TIME(MILLIS, true));
|
required int64 time64_micros_field (TIME(MICROS, true));
|
required int64 time64_nanos_field (TIME(NANOS, true));
|
required int32 time32_millis_pst_field (TIME(MILLIS, false));
|
required int64 time64_micros_pst_field (TIME(MICROS, false));
|
required int64 time64_nanos_pst_field (TIME(NANOS, false));
|
required int64 timestamp64_millis_field (TIMESTAMP(MILLIS, true));
|
required int64 timestamp64_micros_field (TIMESTAMP(MICROS, true));
|
required int64 timestamp64_nanos_field (TIMESTAMP(NANOS, true));
|
required int64 timestamp64_millis_pst_field (TIMESTAMP(MILLIS, false));
|
required int64 timestamp64_micros_pst_field (TIMESTAMP(MICROS, false));
|
required int64 timestamp64_nanos_pst_field (TIMESTAMP(NANOS, false));
|
required int96 timestamp96_field;
|
required fixed_len_byte_array(16) uuid_field (UUID);
|
required group mapField (MAP) {
|
repeated group key_value {
|
required int32 key;
|
required int32 value;
|
}
|
}
|
}
|