From 34332178542203a4f1125898c303cc30bc755f28 Mon Sep 17 00:00:00 2001 From: benibus Date: Wed, 14 Jun 2023 14:48:45 -0400 Subject: [PATCH 01/37] Regenerate thrift headers --- cpp/src/generated/parquet_types.cpp | 2267 ++++++++++++++------------- cpp/src/generated/parquet_types.h | 45 +- cpp/src/parquet/parquet.thrift | 2 + 3 files changed, 1229 insertions(+), 1085 deletions(-) diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index f4e378fd3822a..86188581e0c42 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -1288,6 +1288,81 @@ void DateType::printTo(std::ostream& out) const { } +Float16Type::~Float16Type() noexcept { +} + +std::ostream& operator<<(std::ostream& out, const Float16Type& obj) +{ + obj.printTo(out); + return out; +} + + +uint32_t Float16Type::read(::apache::thrift::protocol::TProtocol* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +uint32_t Float16Type::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Float16Type"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +void swap(Float16Type &a, Float16Type &b) { + using ::std::swap; + (void) a; + (void) b; +} + +Float16Type::Float16Type(const Float16Type& other28) noexcept { + (void) other28; +} +Float16Type::Float16Type(Float16Type&& other29) noexcept { + (void) other29; +} +Float16Type& Float16Type::operator=(const Float16Type& other30) noexcept { + (void) other30; + return *this; +} +Float16Type& Float16Type::operator=(Float16Type&& other31) noexcept { + (void) other31; + return *this; +} +void Float16Type::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "Float16Type("; + out << ")"; +} + + NullType::~NullType() noexcept { } @@ -1342,18 +1417,18 @@ void swap(NullType &a, NullType &b) { (void) b; } -NullType::NullType(const NullType& other28) noexcept { - (void) other28; +NullType::NullType(const NullType& other32) noexcept { + (void) other32; } -NullType::NullType(NullType&& other29) noexcept { - (void) other29; +NullType::NullType(NullType&& other33) noexcept { + (void) other33; } -NullType& NullType::operator=(const NullType& other30) noexcept { - (void) other30; +NullType& NullType::operator=(const NullType& other34) noexcept { + (void) other34; return *this; } -NullType& NullType::operator=(NullType&& other31) noexcept { - (void) other31; +NullType& NullType::operator=(NullType&& other35) noexcept { + (void) other35; return *this; } void NullType::printTo(std::ostream& out) const { @@ -1460,22 +1535,22 @@ void swap(DecimalType &a, DecimalType &b) { swap(a.precision, b.precision); } -DecimalType::DecimalType(const DecimalType& other32) noexcept { - scale = other32.scale; - precision = other32.precision; +DecimalType::DecimalType(const DecimalType& other36) noexcept { + scale = other36.scale; + precision = other36.precision; } -DecimalType::DecimalType(DecimalType&& other33) noexcept { - scale = other33.scale; - precision = other33.precision; +DecimalType::DecimalType(DecimalType&& other37) noexcept { + scale = other37.scale; + precision = other37.precision; } -DecimalType& DecimalType::operator=(const DecimalType& other34) noexcept { - scale = other34.scale; - precision = other34.precision; +DecimalType& DecimalType::operator=(const DecimalType& other38) noexcept { + scale = other38.scale; + precision = other38.precision; return *this; } -DecimalType& DecimalType::operator=(DecimalType&& other35) noexcept { - scale = other35.scale; - precision = other35.precision; +DecimalType& DecimalType::operator=(DecimalType&& other39) noexcept { + scale = other39.scale; + precision = other39.precision; return *this; } void DecimalType::printTo(std::ostream& out) const { @@ -1541,18 +1616,18 @@ void swap(MilliSeconds &a, MilliSeconds &b) { (void) b; } -MilliSeconds::MilliSeconds(const MilliSeconds& other36) noexcept { - (void) other36; +MilliSeconds::MilliSeconds(const MilliSeconds& other40) noexcept { + (void) other40; } -MilliSeconds::MilliSeconds(MilliSeconds&& other37) noexcept { - (void) other37; +MilliSeconds::MilliSeconds(MilliSeconds&& other41) noexcept { + (void) other41; } -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other38) noexcept { - (void) other38; +MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other42) noexcept { + (void) other42; return *this; } -MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other39) noexcept { - (void) other39; +MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other43) noexcept { + (void) other43; return *this; } void MilliSeconds::printTo(std::ostream& out) const { @@ -1616,18 +1691,18 @@ void swap(MicroSeconds &a, MicroSeconds &b) { (void) b; } -MicroSeconds::MicroSeconds(const MicroSeconds& other40) noexcept { - (void) other40; +MicroSeconds::MicroSeconds(const MicroSeconds& other44) noexcept { + (void) other44; } -MicroSeconds::MicroSeconds(MicroSeconds&& other41) noexcept { - (void) other41; +MicroSeconds::MicroSeconds(MicroSeconds&& other45) noexcept { + (void) other45; } -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other42) noexcept { - (void) other42; +MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other46) noexcept { + (void) other46; return *this; } -MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other43) noexcept { - (void) other43; +MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other47) noexcept { + (void) other47; return *this; } void MicroSeconds::printTo(std::ostream& out) const { @@ -1691,18 +1766,18 @@ void swap(NanoSeconds &a, NanoSeconds &b) { (void) b; } -NanoSeconds::NanoSeconds(const NanoSeconds& other44) noexcept { - (void) other44; +NanoSeconds::NanoSeconds(const NanoSeconds& other48) noexcept { + (void) other48; } -NanoSeconds::NanoSeconds(NanoSeconds&& other45) noexcept { - (void) other45; +NanoSeconds::NanoSeconds(NanoSeconds&& other49) noexcept { + (void) other49; } -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other46) noexcept { - (void) other46; +NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other50) noexcept { + (void) other50; return *this; } -NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other47) noexcept { - (void) other47; +NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other51) noexcept { + (void) other51; return *this; } void NanoSeconds::printTo(std::ostream& out) const { @@ -1827,30 +1902,30 @@ void swap(TimeUnit &a, TimeUnit &b) { swap(a.__isset, b.__isset); } -TimeUnit::TimeUnit(const TimeUnit& other48) noexcept { - MILLIS = other48.MILLIS; - MICROS = other48.MICROS; - NANOS = other48.NANOS; - __isset = other48.__isset; +TimeUnit::TimeUnit(const TimeUnit& other52) noexcept { + MILLIS = other52.MILLIS; + MICROS = other52.MICROS; + NANOS = other52.NANOS; + __isset = other52.__isset; } -TimeUnit::TimeUnit(TimeUnit&& other49) noexcept { - MILLIS = std::move(other49.MILLIS); - MICROS = std::move(other49.MICROS); - NANOS = std::move(other49.NANOS); - __isset = other49.__isset; +TimeUnit::TimeUnit(TimeUnit&& other53) noexcept { + MILLIS = std::move(other53.MILLIS); + MICROS = std::move(other53.MICROS); + NANOS = std::move(other53.NANOS); + __isset = other53.__isset; } -TimeUnit& TimeUnit::operator=(const TimeUnit& other50) noexcept { - MILLIS = other50.MILLIS; - MICROS = other50.MICROS; - NANOS = other50.NANOS; - __isset = other50.__isset; +TimeUnit& TimeUnit::operator=(const TimeUnit& other54) noexcept { + MILLIS = other54.MILLIS; + MICROS = other54.MICROS; + NANOS = other54.NANOS; + __isset = other54.__isset; return *this; } -TimeUnit& TimeUnit::operator=(TimeUnit&& other51) noexcept { - MILLIS = std::move(other51.MILLIS); - MICROS = std::move(other51.MICROS); - NANOS = std::move(other51.NANOS); - __isset = other51.__isset; +TimeUnit& TimeUnit::operator=(TimeUnit&& other55) noexcept { + MILLIS = std::move(other55.MILLIS); + MICROS = std::move(other55.MICROS); + NANOS = std::move(other55.NANOS); + __isset = other55.__isset; return *this; } void TimeUnit::printTo(std::ostream& out) const { @@ -1960,22 +2035,22 @@ void swap(TimestampType &a, TimestampType &b) { swap(a.unit, b.unit); } -TimestampType::TimestampType(const TimestampType& other52) noexcept { - isAdjustedToUTC = other52.isAdjustedToUTC; - unit = other52.unit; +TimestampType::TimestampType(const TimestampType& other56) noexcept { + isAdjustedToUTC = other56.isAdjustedToUTC; + unit = other56.unit; } -TimestampType::TimestampType(TimestampType&& other53) noexcept { - isAdjustedToUTC = other53.isAdjustedToUTC; - unit = std::move(other53.unit); +TimestampType::TimestampType(TimestampType&& other57) noexcept { + isAdjustedToUTC = other57.isAdjustedToUTC; + unit = std::move(other57.unit); } -TimestampType& TimestampType::operator=(const TimestampType& other54) noexcept { - isAdjustedToUTC = other54.isAdjustedToUTC; - unit = other54.unit; +TimestampType& TimestampType::operator=(const TimestampType& other58) noexcept { + isAdjustedToUTC = other58.isAdjustedToUTC; + unit = other58.unit; return *this; } -TimestampType& TimestampType::operator=(TimestampType&& other55) noexcept { - isAdjustedToUTC = other55.isAdjustedToUTC; - unit = std::move(other55.unit); +TimestampType& TimestampType::operator=(TimestampType&& other59) noexcept { + isAdjustedToUTC = other59.isAdjustedToUTC; + unit = std::move(other59.unit); return *this; } void TimestampType::printTo(std::ostream& out) const { @@ -2084,22 +2159,22 @@ void swap(TimeType &a, TimeType &b) { swap(a.unit, b.unit); } -TimeType::TimeType(const TimeType& other56) noexcept { - isAdjustedToUTC = other56.isAdjustedToUTC; - unit = other56.unit; +TimeType::TimeType(const TimeType& other60) noexcept { + isAdjustedToUTC = other60.isAdjustedToUTC; + unit = other60.unit; } -TimeType::TimeType(TimeType&& other57) noexcept { - isAdjustedToUTC = other57.isAdjustedToUTC; - unit = std::move(other57.unit); +TimeType::TimeType(TimeType&& other61) noexcept { + isAdjustedToUTC = other61.isAdjustedToUTC; + unit = std::move(other61.unit); } -TimeType& TimeType::operator=(const TimeType& other58) noexcept { - isAdjustedToUTC = other58.isAdjustedToUTC; - unit = other58.unit; +TimeType& TimeType::operator=(const TimeType& other62) noexcept { + isAdjustedToUTC = other62.isAdjustedToUTC; + unit = other62.unit; return *this; } -TimeType& TimeType::operator=(TimeType&& other59) noexcept { - isAdjustedToUTC = other59.isAdjustedToUTC; - unit = std::move(other59.unit); +TimeType& TimeType::operator=(TimeType&& other63) noexcept { + isAdjustedToUTC = other63.isAdjustedToUTC; + unit = std::move(other63.unit); return *this; } void TimeType::printTo(std::ostream& out) const { @@ -2208,22 +2283,22 @@ void swap(IntType &a, IntType &b) { swap(a.isSigned, b.isSigned); } -IntType::IntType(const IntType& other60) noexcept { - bitWidth = other60.bitWidth; - isSigned = other60.isSigned; +IntType::IntType(const IntType& other64) noexcept { + bitWidth = other64.bitWidth; + isSigned = other64.isSigned; } -IntType::IntType(IntType&& other61) noexcept { - bitWidth = other61.bitWidth; - isSigned = other61.isSigned; +IntType::IntType(IntType&& other65) noexcept { + bitWidth = other65.bitWidth; + isSigned = other65.isSigned; } -IntType& IntType::operator=(const IntType& other62) noexcept { - bitWidth = other62.bitWidth; - isSigned = other62.isSigned; +IntType& IntType::operator=(const IntType& other66) noexcept { + bitWidth = other66.bitWidth; + isSigned = other66.isSigned; return *this; } -IntType& IntType::operator=(IntType&& other63) noexcept { - bitWidth = other63.bitWidth; - isSigned = other63.isSigned; +IntType& IntType::operator=(IntType&& other67) noexcept { + bitWidth = other67.bitWidth; + isSigned = other67.isSigned; return *this; } void IntType::printTo(std::ostream& out) const { @@ -2289,18 +2364,18 @@ void swap(JsonType &a, JsonType &b) { (void) b; } -JsonType::JsonType(const JsonType& other64) noexcept { - (void) other64; +JsonType::JsonType(const JsonType& other68) noexcept { + (void) other68; } -JsonType::JsonType(JsonType&& other65) noexcept { - (void) other65; +JsonType::JsonType(JsonType&& other69) noexcept { + (void) other69; } -JsonType& JsonType::operator=(const JsonType& other66) noexcept { - (void) other66; +JsonType& JsonType::operator=(const JsonType& other70) noexcept { + (void) other70; return *this; } -JsonType& JsonType::operator=(JsonType&& other67) noexcept { - (void) other67; +JsonType& JsonType::operator=(JsonType&& other71) noexcept { + (void) other71; return *this; } void JsonType::printTo(std::ostream& out) const { @@ -2364,18 +2439,18 @@ void swap(BsonType &a, BsonType &b) { (void) b; } -BsonType::BsonType(const BsonType& other68) noexcept { - (void) other68; +BsonType::BsonType(const BsonType& other72) noexcept { + (void) other72; } -BsonType::BsonType(BsonType&& other69) noexcept { - (void) other69; +BsonType::BsonType(BsonType&& other73) noexcept { + (void) other73; } -BsonType& BsonType::operator=(const BsonType& other70) noexcept { - (void) other70; +BsonType& BsonType::operator=(const BsonType& other74) noexcept { + (void) other74; return *this; } -BsonType& BsonType::operator=(BsonType&& other71) noexcept { - (void) other71; +BsonType& BsonType::operator=(BsonType&& other75) noexcept { + (void) other75; return *this; } void BsonType::printTo(std::ostream& out) const { @@ -2453,6 +2528,11 @@ void LogicalType::__set_UUID(const UUIDType& val) { this->UUID = val; __isset.UUID = true; } + +void LogicalType::__set_FLOAT16(const Float16Type& val) { + this->FLOAT16 = val; +__isset.FLOAT16 = true; +} std::ostream& operator<<(std::ostream& out, const LogicalType& obj) { obj.printTo(out); @@ -2585,6 +2665,14 @@ uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; + case 15: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->FLOAT16.read(iprot); + this->__isset.FLOAT16 = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -2667,6 +2755,11 @@ uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += this->UUID.write(oprot); xfer += oprot->writeFieldEnd(); } + if (this->__isset.FLOAT16) { + xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15); + xfer += this->FLOAT16.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -2687,73 +2780,78 @@ void swap(LogicalType &a, LogicalType &b) { swap(a.JSON, b.JSON); swap(a.BSON, b.BSON); swap(a.UUID, b.UUID); + swap(a.FLOAT16, b.FLOAT16); swap(a.__isset, b.__isset); } -LogicalType::LogicalType(const LogicalType& other72) noexcept { - STRING = other72.STRING; - MAP = other72.MAP; - LIST = other72.LIST; - ENUM = other72.ENUM; - DECIMAL = other72.DECIMAL; - DATE = other72.DATE; - TIME = other72.TIME; - TIMESTAMP = other72.TIMESTAMP; - INTEGER = other72.INTEGER; - UNKNOWN = other72.UNKNOWN; - JSON = other72.JSON; - BSON = other72.BSON; - UUID = other72.UUID; - __isset = other72.__isset; -} -LogicalType::LogicalType(LogicalType&& other73) noexcept { - STRING = std::move(other73.STRING); - MAP = std::move(other73.MAP); - LIST = std::move(other73.LIST); - ENUM = std::move(other73.ENUM); - DECIMAL = std::move(other73.DECIMAL); - DATE = std::move(other73.DATE); - TIME = std::move(other73.TIME); - TIMESTAMP = std::move(other73.TIMESTAMP); - INTEGER = std::move(other73.INTEGER); - UNKNOWN = std::move(other73.UNKNOWN); - JSON = std::move(other73.JSON); - BSON = std::move(other73.BSON); - UUID = std::move(other73.UUID); - __isset = other73.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other74) noexcept { - STRING = other74.STRING; - MAP = other74.MAP; - LIST = other74.LIST; - ENUM = other74.ENUM; - DECIMAL = other74.DECIMAL; - DATE = other74.DATE; - TIME = other74.TIME; - TIMESTAMP = other74.TIMESTAMP; - INTEGER = other74.INTEGER; - UNKNOWN = other74.UNKNOWN; - JSON = other74.JSON; - BSON = other74.BSON; - UUID = other74.UUID; - __isset = other74.__isset; +LogicalType::LogicalType(const LogicalType& other76) noexcept { + STRING = other76.STRING; + MAP = other76.MAP; + LIST = other76.LIST; + ENUM = other76.ENUM; + DECIMAL = other76.DECIMAL; + DATE = other76.DATE; + TIME = other76.TIME; + TIMESTAMP = other76.TIMESTAMP; + INTEGER = other76.INTEGER; + UNKNOWN = other76.UNKNOWN; + JSON = other76.JSON; + BSON = other76.BSON; + UUID = other76.UUID; + FLOAT16 = other76.FLOAT16; + __isset = other76.__isset; +} +LogicalType::LogicalType(LogicalType&& other77) noexcept { + STRING = std::move(other77.STRING); + MAP = std::move(other77.MAP); + LIST = std::move(other77.LIST); + ENUM = std::move(other77.ENUM); + DECIMAL = std::move(other77.DECIMAL); + DATE = std::move(other77.DATE); + TIME = std::move(other77.TIME); + TIMESTAMP = std::move(other77.TIMESTAMP); + INTEGER = std::move(other77.INTEGER); + UNKNOWN = std::move(other77.UNKNOWN); + JSON = std::move(other77.JSON); + BSON = std::move(other77.BSON); + UUID = std::move(other77.UUID); + FLOAT16 = std::move(other77.FLOAT16); + __isset = other77.__isset; +} +LogicalType& LogicalType::operator=(const LogicalType& other78) noexcept { + STRING = other78.STRING; + MAP = other78.MAP; + LIST = other78.LIST; + ENUM = other78.ENUM; + DECIMAL = other78.DECIMAL; + DATE = other78.DATE; + TIME = other78.TIME; + TIMESTAMP = other78.TIMESTAMP; + INTEGER = other78.INTEGER; + UNKNOWN = other78.UNKNOWN; + JSON = other78.JSON; + BSON = other78.BSON; + UUID = other78.UUID; + FLOAT16 = other78.FLOAT16; + __isset = other78.__isset; return *this; } -LogicalType& LogicalType::operator=(LogicalType&& other75) noexcept { - STRING = std::move(other75.STRING); - MAP = std::move(other75.MAP); - LIST = std::move(other75.LIST); - ENUM = std::move(other75.ENUM); - DECIMAL = std::move(other75.DECIMAL); - DATE = std::move(other75.DATE); - TIME = std::move(other75.TIME); - TIMESTAMP = std::move(other75.TIMESTAMP); - INTEGER = std::move(other75.INTEGER); - UNKNOWN = std::move(other75.UNKNOWN); - JSON = std::move(other75.JSON); - BSON = std::move(other75.BSON); - UUID = std::move(other75.UUID); - __isset = other75.__isset; +LogicalType& LogicalType::operator=(LogicalType&& other79) noexcept { + STRING = std::move(other79.STRING); + MAP = std::move(other79.MAP); + LIST = std::move(other79.LIST); + ENUM = std::move(other79.ENUM); + DECIMAL = std::move(other79.DECIMAL); + DATE = std::move(other79.DATE); + TIME = std::move(other79.TIME); + TIMESTAMP = std::move(other79.TIMESTAMP); + INTEGER = std::move(other79.INTEGER); + UNKNOWN = std::move(other79.UNKNOWN); + JSON = std::move(other79.JSON); + BSON = std::move(other79.BSON); + UUID = std::move(other79.UUID); + FLOAT16 = std::move(other79.FLOAT16); + __isset = other79.__isset; return *this; } void LogicalType::printTo(std::ostream& out) const { @@ -2772,6 +2870,7 @@ void LogicalType::printTo(std::ostream& out) const { out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "")); out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "")); out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "")); + out << ", " << "FLOAT16="; (__isset.FLOAT16 ? (out << to_string(FLOAT16)) : (out << "")); out << ")"; } @@ -2859,9 +2958,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast76; - xfer += iprot->readI32(ecast76); - this->type = static_cast(ecast76); + int32_t ecast80; + xfer += iprot->readI32(ecast80); + this->type = static_cast(ecast80); this->__isset.type = true; } else { xfer += iprot->skip(ftype); @@ -2877,9 +2976,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast77; - xfer += iprot->readI32(ecast77); - this->repetition_type = static_cast(ecast77); + int32_t ecast81; + xfer += iprot->readI32(ecast81); + this->repetition_type = static_cast(ecast81); this->__isset.repetition_type = true; } else { xfer += iprot->skip(ftype); @@ -2903,9 +3002,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 6: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast78; - xfer += iprot->readI32(ecast78); - this->converted_type = static_cast(ecast78); + int32_t ecast82; + xfer += iprot->readI32(ecast82); + this->converted_type = static_cast(ecast82); this->__isset.converted_type = true; } else { xfer += iprot->skip(ftype); @@ -3031,58 +3130,58 @@ void swap(SchemaElement &a, SchemaElement &b) { swap(a.__isset, b.__isset); } -SchemaElement::SchemaElement(const SchemaElement& other79) { - type = other79.type; - type_length = other79.type_length; - repetition_type = other79.repetition_type; - name = other79.name; - num_children = other79.num_children; - converted_type = other79.converted_type; - scale = other79.scale; - precision = other79.precision; - field_id = other79.field_id; - logicalType = other79.logicalType; - __isset = other79.__isset; -} -SchemaElement::SchemaElement(SchemaElement&& other80) noexcept { - type = other80.type; - type_length = other80.type_length; - repetition_type = other80.repetition_type; - name = std::move(other80.name); - num_children = other80.num_children; - converted_type = other80.converted_type; - scale = other80.scale; - precision = other80.precision; - field_id = other80.field_id; - logicalType = std::move(other80.logicalType); - __isset = other80.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other81) { - type = other81.type; - type_length = other81.type_length; - repetition_type = other81.repetition_type; - name = other81.name; - num_children = other81.num_children; - converted_type = other81.converted_type; - scale = other81.scale; - precision = other81.precision; - field_id = other81.field_id; - logicalType = other81.logicalType; - __isset = other81.__isset; +SchemaElement::SchemaElement(const SchemaElement& other83) { + type = other83.type; + type_length = other83.type_length; + repetition_type = other83.repetition_type; + name = other83.name; + num_children = other83.num_children; + converted_type = other83.converted_type; + scale = other83.scale; + precision = other83.precision; + field_id = other83.field_id; + logicalType = other83.logicalType; + __isset = other83.__isset; +} +SchemaElement::SchemaElement(SchemaElement&& other84) noexcept { + type = other84.type; + type_length = other84.type_length; + repetition_type = other84.repetition_type; + name = std::move(other84.name); + num_children = other84.num_children; + converted_type = other84.converted_type; + scale = other84.scale; + precision = other84.precision; + field_id = other84.field_id; + logicalType = std::move(other84.logicalType); + __isset = other84.__isset; +} +SchemaElement& SchemaElement::operator=(const SchemaElement& other85) { + type = other85.type; + type_length = other85.type_length; + repetition_type = other85.repetition_type; + name = other85.name; + num_children = other85.num_children; + converted_type = other85.converted_type; + scale = other85.scale; + precision = other85.precision; + field_id = other85.field_id; + logicalType = other85.logicalType; + __isset = other85.__isset; return *this; } -SchemaElement& SchemaElement::operator=(SchemaElement&& other82) noexcept { - type = other82.type; - type_length = other82.type_length; - repetition_type = other82.repetition_type; - name = std::move(other82.name); - num_children = other82.num_children; - converted_type = other82.converted_type; - scale = other82.scale; - precision = other82.precision; - field_id = other82.field_id; - logicalType = std::move(other82.logicalType); - __isset = other82.__isset; +SchemaElement& SchemaElement::operator=(SchemaElement&& other86) noexcept { + type = other86.type; + type_length = other86.type_length; + repetition_type = other86.repetition_type; + name = std::move(other86.name); + num_children = other86.num_children; + converted_type = other86.converted_type; + scale = other86.scale; + precision = other86.precision; + field_id = other86.field_id; + logicalType = std::move(other86.logicalType); + __isset = other86.__isset; return *this; } void SchemaElement::printTo(std::ostream& out) const { @@ -3168,9 +3267,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast83; - xfer += iprot->readI32(ecast83); - this->encoding = static_cast(ecast83); + int32_t ecast87; + xfer += iprot->readI32(ecast87); + this->encoding = static_cast(ecast87); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -3178,9 +3277,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast84; - xfer += iprot->readI32(ecast84); - this->definition_level_encoding = static_cast(ecast84); + int32_t ecast88; + xfer += iprot->readI32(ecast88); + this->definition_level_encoding = static_cast(ecast88); isset_definition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -3188,9 +3287,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast85; - xfer += iprot->readI32(ecast85); - this->repetition_level_encoding = static_cast(ecast85); + int32_t ecast89; + xfer += iprot->readI32(ecast89); + this->repetition_level_encoding = static_cast(ecast89); isset_repetition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -3265,38 +3364,38 @@ void swap(DataPageHeader &a, DataPageHeader &b) { swap(a.__isset, b.__isset); } -DataPageHeader::DataPageHeader(const DataPageHeader& other86) { - num_values = other86.num_values; - encoding = other86.encoding; - definition_level_encoding = other86.definition_level_encoding; - repetition_level_encoding = other86.repetition_level_encoding; - statistics = other86.statistics; - __isset = other86.__isset; -} -DataPageHeader::DataPageHeader(DataPageHeader&& other87) noexcept { - num_values = other87.num_values; - encoding = other87.encoding; - definition_level_encoding = other87.definition_level_encoding; - repetition_level_encoding = other87.repetition_level_encoding; - statistics = std::move(other87.statistics); - __isset = other87.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other88) { - num_values = other88.num_values; - encoding = other88.encoding; - definition_level_encoding = other88.definition_level_encoding; - repetition_level_encoding = other88.repetition_level_encoding; - statistics = other88.statistics; - __isset = other88.__isset; +DataPageHeader::DataPageHeader(const DataPageHeader& other90) { + num_values = other90.num_values; + encoding = other90.encoding; + definition_level_encoding = other90.definition_level_encoding; + repetition_level_encoding = other90.repetition_level_encoding; + statistics = other90.statistics; + __isset = other90.__isset; +} +DataPageHeader::DataPageHeader(DataPageHeader&& other91) noexcept { + num_values = other91.num_values; + encoding = other91.encoding; + definition_level_encoding = other91.definition_level_encoding; + repetition_level_encoding = other91.repetition_level_encoding; + statistics = std::move(other91.statistics); + __isset = other91.__isset; +} +DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other92) { + num_values = other92.num_values; + encoding = other92.encoding; + definition_level_encoding = other92.definition_level_encoding; + repetition_level_encoding = other92.repetition_level_encoding; + statistics = other92.statistics; + __isset = other92.__isset; return *this; } -DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other89) noexcept { - num_values = other89.num_values; - encoding = other89.encoding; - definition_level_encoding = other89.definition_level_encoding; - repetition_level_encoding = other89.repetition_level_encoding; - statistics = std::move(other89.statistics); - __isset = other89.__isset; +DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other93) noexcept { + num_values = other93.num_values; + encoding = other93.encoding; + definition_level_encoding = other93.definition_level_encoding; + repetition_level_encoding = other93.repetition_level_encoding; + statistics = std::move(other93.statistics); + __isset = other93.__isset; return *this; } void DataPageHeader::printTo(std::ostream& out) const { @@ -3365,18 +3464,18 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) { (void) b; } -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other90) noexcept { - (void) other90; +IndexPageHeader::IndexPageHeader(const IndexPageHeader& other94) noexcept { + (void) other94; } -IndexPageHeader::IndexPageHeader(IndexPageHeader&& other91) noexcept { - (void) other91; +IndexPageHeader::IndexPageHeader(IndexPageHeader&& other95) noexcept { + (void) other95; } -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other92) noexcept { - (void) other92; +IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other96) noexcept { + (void) other96; return *this; } -IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other93) noexcept { - (void) other93; +IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other97) noexcept { + (void) other97; return *this; } void IndexPageHeader::printTo(std::ostream& out) const { @@ -3442,9 +3541,9 @@ uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast94; - xfer += iprot->readI32(ecast94); - this->encoding = static_cast(ecast94); + int32_t ecast98; + xfer += iprot->readI32(ecast98); + this->encoding = static_cast(ecast98); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -3505,30 +3604,30 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { swap(a.__isset, b.__isset); } -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other95) noexcept { - num_values = other95.num_values; - encoding = other95.encoding; - is_sorted = other95.is_sorted; - __isset = other95.__isset; +DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other99) noexcept { + num_values = other99.num_values; + encoding = other99.encoding; + is_sorted = other99.is_sorted; + __isset = other99.__isset; } -DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other96) noexcept { - num_values = other96.num_values; - encoding = other96.encoding; - is_sorted = other96.is_sorted; - __isset = other96.__isset; +DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other100) noexcept { + num_values = other100.num_values; + encoding = other100.encoding; + is_sorted = other100.is_sorted; + __isset = other100.__isset; } -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other97) noexcept { - num_values = other97.num_values; - encoding = other97.encoding; - is_sorted = other97.is_sorted; - __isset = other97.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other101) noexcept { + num_values = other101.num_values; + encoding = other101.encoding; + is_sorted = other101.is_sorted; + __isset = other101.__isset; return *this; } -DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other98) noexcept { - num_values = other98.num_values; - encoding = other98.encoding; - is_sorted = other98.is_sorted; - __isset = other98.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other102) noexcept { + num_values = other102.num_values; + encoding = other102.encoding; + is_sorted = other102.is_sorted; + __isset = other102.__isset; return *this; } void DictionaryPageHeader::printTo(std::ostream& out) const { @@ -3638,9 +3737,9 @@ uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast99; - xfer += iprot->readI32(ecast99); - this->encoding = static_cast(ecast99); + int32_t ecast103; + xfer += iprot->readI32(ecast103); + this->encoding = static_cast(ecast103); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -3759,50 +3858,50 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { swap(a.__isset, b.__isset); } -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other100) { - num_values = other100.num_values; - num_nulls = other100.num_nulls; - num_rows = other100.num_rows; - encoding = other100.encoding; - definition_levels_byte_length = other100.definition_levels_byte_length; - repetition_levels_byte_length = other100.repetition_levels_byte_length; - is_compressed = other100.is_compressed; - statistics = other100.statistics; - __isset = other100.__isset; -} -DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other101) noexcept { - num_values = other101.num_values; - num_nulls = other101.num_nulls; - num_rows = other101.num_rows; - encoding = other101.encoding; - definition_levels_byte_length = other101.definition_levels_byte_length; - repetition_levels_byte_length = other101.repetition_levels_byte_length; - is_compressed = other101.is_compressed; - statistics = std::move(other101.statistics); - __isset = other101.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other102) { - num_values = other102.num_values; - num_nulls = other102.num_nulls; - num_rows = other102.num_rows; - encoding = other102.encoding; - definition_levels_byte_length = other102.definition_levels_byte_length; - repetition_levels_byte_length = other102.repetition_levels_byte_length; - is_compressed = other102.is_compressed; - statistics = other102.statistics; - __isset = other102.__isset; +DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other104) { + num_values = other104.num_values; + num_nulls = other104.num_nulls; + num_rows = other104.num_rows; + encoding = other104.encoding; + definition_levels_byte_length = other104.definition_levels_byte_length; + repetition_levels_byte_length = other104.repetition_levels_byte_length; + is_compressed = other104.is_compressed; + statistics = other104.statistics; + __isset = other104.__isset; +} +DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other105) noexcept { + num_values = other105.num_values; + num_nulls = other105.num_nulls; + num_rows = other105.num_rows; + encoding = other105.encoding; + definition_levels_byte_length = other105.definition_levels_byte_length; + repetition_levels_byte_length = other105.repetition_levels_byte_length; + is_compressed = other105.is_compressed; + statistics = std::move(other105.statistics); + __isset = other105.__isset; +} +DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other106) { + num_values = other106.num_values; + num_nulls = other106.num_nulls; + num_rows = other106.num_rows; + encoding = other106.encoding; + definition_levels_byte_length = other106.definition_levels_byte_length; + repetition_levels_byte_length = other106.repetition_levels_byte_length; + is_compressed = other106.is_compressed; + statistics = other106.statistics; + __isset = other106.__isset; return *this; } -DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other103) noexcept { - num_values = other103.num_values; - num_nulls = other103.num_nulls; - num_rows = other103.num_rows; - encoding = other103.encoding; - definition_levels_byte_length = other103.definition_levels_byte_length; - repetition_levels_byte_length = other103.repetition_levels_byte_length; - is_compressed = other103.is_compressed; - statistics = std::move(other103.statistics); - __isset = other103.__isset; +DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other107) noexcept { + num_values = other107.num_values; + num_nulls = other107.num_nulls; + num_rows = other107.num_rows; + encoding = other107.encoding; + definition_levels_byte_length = other107.definition_levels_byte_length; + repetition_levels_byte_length = other107.repetition_levels_byte_length; + is_compressed = other107.is_compressed; + statistics = std::move(other107.statistics); + __isset = other107.__isset; return *this; } void DataPageHeaderV2::printTo(std::ostream& out) const { @@ -3874,18 +3973,18 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { (void) b; } -SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other104) noexcept { - (void) other104; +SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other108) noexcept { + (void) other108; } -SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other105) noexcept { - (void) other105; +SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other109) noexcept { + (void) other109; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other106) noexcept { - (void) other106; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other110) noexcept { + (void) other110; return *this; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other107) noexcept { - (void) other107; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other111) noexcept { + (void) other111; return *this; } void SplitBlockAlgorithm::printTo(std::ostream& out) const { @@ -3972,22 +4071,22 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { swap(a.__isset, b.__isset); } -BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other108) noexcept { - BLOCK = other108.BLOCK; - __isset = other108.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other112) noexcept { + BLOCK = other112.BLOCK; + __isset = other112.__isset; } -BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other109) noexcept { - BLOCK = std::move(other109.BLOCK); - __isset = other109.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other113) noexcept { + BLOCK = std::move(other113.BLOCK); + __isset = other113.__isset; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other110) noexcept { - BLOCK = other110.BLOCK; - __isset = other110.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other114) noexcept { + BLOCK = other114.BLOCK; + __isset = other114.__isset; return *this; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other111) noexcept { - BLOCK = std::move(other111.BLOCK); - __isset = other111.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other115) noexcept { + BLOCK = std::move(other115.BLOCK); + __isset = other115.__isset; return *this; } void BloomFilterAlgorithm::printTo(std::ostream& out) const { @@ -4052,18 +4151,18 @@ void swap(XxHash &a, XxHash &b) { (void) b; } -XxHash::XxHash(const XxHash& other112) noexcept { - (void) other112; +XxHash::XxHash(const XxHash& other116) noexcept { + (void) other116; } -XxHash::XxHash(XxHash&& other113) noexcept { - (void) other113; +XxHash::XxHash(XxHash&& other117) noexcept { + (void) other117; } -XxHash& XxHash::operator=(const XxHash& other114) noexcept { - (void) other114; +XxHash& XxHash::operator=(const XxHash& other118) noexcept { + (void) other118; return *this; } -XxHash& XxHash::operator=(XxHash&& other115) noexcept { - (void) other115; +XxHash& XxHash::operator=(XxHash&& other119) noexcept { + (void) other119; return *this; } void XxHash::printTo(std::ostream& out) const { @@ -4150,22 +4249,22 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) { swap(a.__isset, b.__isset); } -BloomFilterHash::BloomFilterHash(const BloomFilterHash& other116) noexcept { - XXHASH = other116.XXHASH; - __isset = other116.__isset; +BloomFilterHash::BloomFilterHash(const BloomFilterHash& other120) noexcept { + XXHASH = other120.XXHASH; + __isset = other120.__isset; } -BloomFilterHash::BloomFilterHash(BloomFilterHash&& other117) noexcept { - XXHASH = std::move(other117.XXHASH); - __isset = other117.__isset; +BloomFilterHash::BloomFilterHash(BloomFilterHash&& other121) noexcept { + XXHASH = std::move(other121.XXHASH); + __isset = other121.__isset; } -BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other118) noexcept { - XXHASH = other118.XXHASH; - __isset = other118.__isset; +BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other122) noexcept { + XXHASH = other122.XXHASH; + __isset = other122.__isset; return *this; } -BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other119) noexcept { - XXHASH = std::move(other119.XXHASH); - __isset = other119.__isset; +BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other123) noexcept { + XXHASH = std::move(other123.XXHASH); + __isset = other123.__isset; return *this; } void BloomFilterHash::printTo(std::ostream& out) const { @@ -4230,18 +4329,18 @@ void swap(Uncompressed &a, Uncompressed &b) { (void) b; } -Uncompressed::Uncompressed(const Uncompressed& other120) noexcept { - (void) other120; +Uncompressed::Uncompressed(const Uncompressed& other124) noexcept { + (void) other124; } -Uncompressed::Uncompressed(Uncompressed&& other121) noexcept { - (void) other121; +Uncompressed::Uncompressed(Uncompressed&& other125) noexcept { + (void) other125; } -Uncompressed& Uncompressed::operator=(const Uncompressed& other122) noexcept { - (void) other122; +Uncompressed& Uncompressed::operator=(const Uncompressed& other126) noexcept { + (void) other126; return *this; } -Uncompressed& Uncompressed::operator=(Uncompressed&& other123) noexcept { - (void) other123; +Uncompressed& Uncompressed::operator=(Uncompressed&& other127) noexcept { + (void) other127; return *this; } void Uncompressed::printTo(std::ostream& out) const { @@ -4328,22 +4427,22 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) { swap(a.__isset, b.__isset); } -BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other124) noexcept { - UNCOMPRESSED = other124.UNCOMPRESSED; - __isset = other124.__isset; +BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other128) noexcept { + UNCOMPRESSED = other128.UNCOMPRESSED; + __isset = other128.__isset; } -BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other125) noexcept { - UNCOMPRESSED = std::move(other125.UNCOMPRESSED); - __isset = other125.__isset; +BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other129) noexcept { + UNCOMPRESSED = std::move(other129.UNCOMPRESSED); + __isset = other129.__isset; } -BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other126) noexcept { - UNCOMPRESSED = other126.UNCOMPRESSED; - __isset = other126.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other130) noexcept { + UNCOMPRESSED = other130.UNCOMPRESSED; + __isset = other130.__isset; return *this; } -BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other127) noexcept { - UNCOMPRESSED = std::move(other127.UNCOMPRESSED); - __isset = other127.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other131) noexcept { + UNCOMPRESSED = std::move(other131.UNCOMPRESSED); + __isset = other131.__isset; return *this; } void BloomFilterCompression::printTo(std::ostream& out) const { @@ -4491,30 +4590,30 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) { swap(a.compression, b.compression); } -BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other128) noexcept { - numBytes = other128.numBytes; - algorithm = other128.algorithm; - hash = other128.hash; - compression = other128.compression; +BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other132) noexcept { + numBytes = other132.numBytes; + algorithm = other132.algorithm; + hash = other132.hash; + compression = other132.compression; } -BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other129) noexcept { - numBytes = other129.numBytes; - algorithm = std::move(other129.algorithm); - hash = std::move(other129.hash); - compression = std::move(other129.compression); +BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other133) noexcept { + numBytes = other133.numBytes; + algorithm = std::move(other133.algorithm); + hash = std::move(other133.hash); + compression = std::move(other133.compression); } -BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other130) noexcept { - numBytes = other130.numBytes; - algorithm = other130.algorithm; - hash = other130.hash; - compression = other130.compression; +BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other134) noexcept { + numBytes = other134.numBytes; + algorithm = other134.algorithm; + hash = other134.hash; + compression = other134.compression; return *this; } -BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other131) noexcept { - numBytes = other131.numBytes; - algorithm = std::move(other131.algorithm); - hash = std::move(other131.hash); - compression = std::move(other131.compression); +BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other135) noexcept { + numBytes = other135.numBytes; + algorithm = std::move(other135.algorithm); + hash = std::move(other135.hash); + compression = std::move(other135.compression); return *this; } void BloomFilterHeader::printTo(std::ostream& out) const { @@ -4601,9 +4700,9 @@ uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast132; - xfer += iprot->readI32(ecast132); - this->type = static_cast(ecast132); + int32_t ecast136; + xfer += iprot->readI32(ecast136); + this->type = static_cast(ecast136); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -4743,50 +4842,50 @@ void swap(PageHeader &a, PageHeader &b) { swap(a.__isset, b.__isset); } -PageHeader::PageHeader(const PageHeader& other133) { - type = other133.type; - uncompressed_page_size = other133.uncompressed_page_size; - compressed_page_size = other133.compressed_page_size; - crc = other133.crc; - data_page_header = other133.data_page_header; - index_page_header = other133.index_page_header; - dictionary_page_header = other133.dictionary_page_header; - data_page_header_v2 = other133.data_page_header_v2; - __isset = other133.__isset; -} -PageHeader::PageHeader(PageHeader&& other134) noexcept { - type = other134.type; - uncompressed_page_size = other134.uncompressed_page_size; - compressed_page_size = other134.compressed_page_size; - crc = other134.crc; - data_page_header = std::move(other134.data_page_header); - index_page_header = std::move(other134.index_page_header); - dictionary_page_header = std::move(other134.dictionary_page_header); - data_page_header_v2 = std::move(other134.data_page_header_v2); - __isset = other134.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other135) { - type = other135.type; - uncompressed_page_size = other135.uncompressed_page_size; - compressed_page_size = other135.compressed_page_size; - crc = other135.crc; - data_page_header = other135.data_page_header; - index_page_header = other135.index_page_header; - dictionary_page_header = other135.dictionary_page_header; - data_page_header_v2 = other135.data_page_header_v2; - __isset = other135.__isset; +PageHeader::PageHeader(const PageHeader& other137) { + type = other137.type; + uncompressed_page_size = other137.uncompressed_page_size; + compressed_page_size = other137.compressed_page_size; + crc = other137.crc; + data_page_header = other137.data_page_header; + index_page_header = other137.index_page_header; + dictionary_page_header = other137.dictionary_page_header; + data_page_header_v2 = other137.data_page_header_v2; + __isset = other137.__isset; +} +PageHeader::PageHeader(PageHeader&& other138) noexcept { + type = other138.type; + uncompressed_page_size = other138.uncompressed_page_size; + compressed_page_size = other138.compressed_page_size; + crc = other138.crc; + data_page_header = std::move(other138.data_page_header); + index_page_header = std::move(other138.index_page_header); + dictionary_page_header = std::move(other138.dictionary_page_header); + data_page_header_v2 = std::move(other138.data_page_header_v2); + __isset = other138.__isset; +} +PageHeader& PageHeader::operator=(const PageHeader& other139) { + type = other139.type; + uncompressed_page_size = other139.uncompressed_page_size; + compressed_page_size = other139.compressed_page_size; + crc = other139.crc; + data_page_header = other139.data_page_header; + index_page_header = other139.index_page_header; + dictionary_page_header = other139.dictionary_page_header; + data_page_header_v2 = other139.data_page_header_v2; + __isset = other139.__isset; return *this; } -PageHeader& PageHeader::operator=(PageHeader&& other136) noexcept { - type = other136.type; - uncompressed_page_size = other136.uncompressed_page_size; - compressed_page_size = other136.compressed_page_size; - crc = other136.crc; - data_page_header = std::move(other136.data_page_header); - index_page_header = std::move(other136.index_page_header); - dictionary_page_header = std::move(other136.dictionary_page_header); - data_page_header_v2 = std::move(other136.data_page_header_v2); - __isset = other136.__isset; +PageHeader& PageHeader::operator=(PageHeader&& other140) noexcept { + type = other140.type; + uncompressed_page_size = other140.uncompressed_page_size; + compressed_page_size = other140.compressed_page_size; + crc = other140.crc; + data_page_header = std::move(other140.data_page_header); + index_page_header = std::move(other140.index_page_header); + dictionary_page_header = std::move(other140.dictionary_page_header); + data_page_header_v2 = std::move(other140.data_page_header_v2); + __isset = other140.__isset; return *this; } void PageHeader::printTo(std::ostream& out) const { @@ -4901,26 +5000,26 @@ void swap(KeyValue &a, KeyValue &b) { swap(a.__isset, b.__isset); } -KeyValue::KeyValue(const KeyValue& other137) { - key = other137.key; - value = other137.value; - __isset = other137.__isset; +KeyValue::KeyValue(const KeyValue& other141) { + key = other141.key; + value = other141.value; + __isset = other141.__isset; } -KeyValue::KeyValue(KeyValue&& other138) noexcept { - key = std::move(other138.key); - value = std::move(other138.value); - __isset = other138.__isset; +KeyValue::KeyValue(KeyValue&& other142) noexcept { + key = std::move(other142.key); + value = std::move(other142.value); + __isset = other142.__isset; } -KeyValue& KeyValue::operator=(const KeyValue& other139) { - key = other139.key; - value = other139.value; - __isset = other139.__isset; +KeyValue& KeyValue::operator=(const KeyValue& other143) { + key = other143.key; + value = other143.value; + __isset = other143.__isset; return *this; } -KeyValue& KeyValue::operator=(KeyValue&& other140) noexcept { - key = std::move(other140.key); - value = std::move(other140.value); - __isset = other140.__isset; +KeyValue& KeyValue::operator=(KeyValue&& other144) noexcept { + key = std::move(other144.key); + value = std::move(other144.value); + __isset = other144.__isset; return *this; } void KeyValue::printTo(std::ostream& out) const { @@ -5049,26 +5148,26 @@ void swap(SortingColumn &a, SortingColumn &b) { swap(a.nulls_first, b.nulls_first); } -SortingColumn::SortingColumn(const SortingColumn& other141) noexcept { - column_idx = other141.column_idx; - descending = other141.descending; - nulls_first = other141.nulls_first; +SortingColumn::SortingColumn(const SortingColumn& other145) noexcept { + column_idx = other145.column_idx; + descending = other145.descending; + nulls_first = other145.nulls_first; } -SortingColumn::SortingColumn(SortingColumn&& other142) noexcept { - column_idx = other142.column_idx; - descending = other142.descending; - nulls_first = other142.nulls_first; +SortingColumn::SortingColumn(SortingColumn&& other146) noexcept { + column_idx = other146.column_idx; + descending = other146.descending; + nulls_first = other146.nulls_first; } -SortingColumn& SortingColumn::operator=(const SortingColumn& other143) noexcept { - column_idx = other143.column_idx; - descending = other143.descending; - nulls_first = other143.nulls_first; +SortingColumn& SortingColumn::operator=(const SortingColumn& other147) noexcept { + column_idx = other147.column_idx; + descending = other147.descending; + nulls_first = other147.nulls_first; return *this; } -SortingColumn& SortingColumn::operator=(SortingColumn&& other144) noexcept { - column_idx = other144.column_idx; - descending = other144.descending; - nulls_first = other144.nulls_first; +SortingColumn& SortingColumn::operator=(SortingColumn&& other148) noexcept { + column_idx = other148.column_idx; + descending = other148.descending; + nulls_first = other148.nulls_first; return *this; } void SortingColumn::printTo(std::ostream& out) const { @@ -5129,9 +5228,9 @@ uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast145; - xfer += iprot->readI32(ecast145); - this->page_type = static_cast(ecast145); + int32_t ecast149; + xfer += iprot->readI32(ecast149); + this->page_type = static_cast(ecast149); isset_page_type = true; } else { xfer += iprot->skip(ftype); @@ -5139,9 +5238,9 @@ uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast146; - xfer += iprot->readI32(ecast146); - this->encoding = static_cast(ecast146); + int32_t ecast150; + xfer += iprot->readI32(ecast150); + this->encoding = static_cast(ecast150); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -5202,26 +5301,26 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) { swap(a.count, b.count); } -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other147) noexcept { - page_type = other147.page_type; - encoding = other147.encoding; - count = other147.count; +PageEncodingStats::PageEncodingStats(const PageEncodingStats& other151) noexcept { + page_type = other151.page_type; + encoding = other151.encoding; + count = other151.count; } -PageEncodingStats::PageEncodingStats(PageEncodingStats&& other148) noexcept { - page_type = other148.page_type; - encoding = other148.encoding; - count = other148.count; +PageEncodingStats::PageEncodingStats(PageEncodingStats&& other152) noexcept { + page_type = other152.page_type; + encoding = other152.encoding; + count = other152.count; } -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other149) noexcept { - page_type = other149.page_type; - encoding = other149.encoding; - count = other149.count; +PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other153) noexcept { + page_type = other153.page_type; + encoding = other153.encoding; + count = other153.count; return *this; } -PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other150) noexcept { - page_type = other150.page_type; - encoding = other150.encoding; - count = other150.count; +PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other154) noexcept { + page_type = other154.page_type; + encoding = other154.encoding; + count = other154.count; return *this; } void PageEncodingStats::printTo(std::ostream& out) const { @@ -5337,9 +5436,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast151; - xfer += iprot->readI32(ecast151); - this->type = static_cast(ecast151); + int32_t ecast155; + xfer += iprot->readI32(ecast155); + this->type = static_cast(ecast155); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -5349,16 +5448,16 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encodings.clear(); - uint32_t _size152; - ::apache::thrift::protocol::TType _etype155; - xfer += iprot->readListBegin(_etype155, _size152); - this->encodings.resize(_size152); - uint32_t _i156; - for (_i156 = 0; _i156 < _size152; ++_i156) + uint32_t _size156; + ::apache::thrift::protocol::TType _etype159; + xfer += iprot->readListBegin(_etype159, _size156); + this->encodings.resize(_size156); + uint32_t _i160; + for (_i160 = 0; _i160 < _size156; ++_i160) { - int32_t ecast157; - xfer += iprot->readI32(ecast157); - this->encodings[_i156] = static_cast(ecast157); + int32_t ecast161; + xfer += iprot->readI32(ecast161); + this->encodings[_i160] = static_cast(ecast161); } xfer += iprot->readListEnd(); } @@ -5371,14 +5470,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size158; - ::apache::thrift::protocol::TType _etype161; - xfer += iprot->readListBegin(_etype161, _size158); - this->path_in_schema.resize(_size158); - uint32_t _i162; - for (_i162 = 0; _i162 < _size158; ++_i162) + uint32_t _size162; + ::apache::thrift::protocol::TType _etype165; + xfer += iprot->readListBegin(_etype165, _size162); + this->path_in_schema.resize(_size162); + uint32_t _i166; + for (_i166 = 0; _i166 < _size162; ++_i166) { - xfer += iprot->readString(this->path_in_schema[_i162]); + xfer += iprot->readString(this->path_in_schema[_i166]); } xfer += iprot->readListEnd(); } @@ -5389,9 +5488,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast163; - xfer += iprot->readI32(ecast163); - this->codec = static_cast(ecast163); + int32_t ecast167; + xfer += iprot->readI32(ecast167); + this->codec = static_cast(ecast167); isset_codec = true; } else { xfer += iprot->skip(ftype); @@ -5425,14 +5524,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size164; - ::apache::thrift::protocol::TType _etype167; - xfer += iprot->readListBegin(_etype167, _size164); - this->key_value_metadata.resize(_size164); - uint32_t _i168; - for (_i168 = 0; _i168 < _size164; ++_i168) + uint32_t _size168; + ::apache::thrift::protocol::TType _etype171; + xfer += iprot->readListBegin(_etype171, _size168); + this->key_value_metadata.resize(_size168); + uint32_t _i172; + for (_i172 = 0; _i172 < _size168; ++_i172) { - xfer += this->key_value_metadata[_i168].read(iprot); + xfer += this->key_value_metadata[_i172].read(iprot); } xfer += iprot->readListEnd(); } @@ -5477,14 +5576,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encoding_stats.clear(); - uint32_t _size169; - ::apache::thrift::protocol::TType _etype172; - xfer += iprot->readListBegin(_etype172, _size169); - this->encoding_stats.resize(_size169); - uint32_t _i173; - for (_i173 = 0; _i173 < _size169; ++_i173) + uint32_t _size173; + ::apache::thrift::protocol::TType _etype176; + xfer += iprot->readListBegin(_etype176, _size173); + this->encoding_stats.resize(_size173); + uint32_t _i177; + for (_i177 = 0; _i177 < _size173; ++_i177) { - xfer += this->encoding_stats[_i173].read(iprot); + xfer += this->encoding_stats[_i177].read(iprot); } xfer += iprot->readListEnd(); } @@ -5541,10 +5640,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter174; - for (_iter174 = this->encodings.begin(); _iter174 != this->encodings.end(); ++_iter174) + std::vector ::const_iterator _iter178; + for (_iter178 = this->encodings.begin(); _iter178 != this->encodings.end(); ++_iter178) { - xfer += oprot->writeI32(static_cast((*_iter174))); + xfer += oprot->writeI32(static_cast((*_iter178))); } xfer += oprot->writeListEnd(); } @@ -5553,10 +5652,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter175; - for (_iter175 = this->path_in_schema.begin(); _iter175 != this->path_in_schema.end(); ++_iter175) + std::vector ::const_iterator _iter179; + for (_iter179 = this->path_in_schema.begin(); _iter179 != this->path_in_schema.end(); ++_iter179) { - xfer += oprot->writeString((*_iter175)); + xfer += oprot->writeString((*_iter179)); } xfer += oprot->writeListEnd(); } @@ -5582,10 +5681,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter176; - for (_iter176 = this->key_value_metadata.begin(); _iter176 != this->key_value_metadata.end(); ++_iter176) + std::vector ::const_iterator _iter180; + for (_iter180 = this->key_value_metadata.begin(); _iter180 != this->key_value_metadata.end(); ++_iter180) { - xfer += (*_iter176).write(oprot); + xfer += (*_iter180).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5614,10 +5713,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter177; - for (_iter177 = this->encoding_stats.begin(); _iter177 != this->encoding_stats.end(); ++_iter177) + std::vector ::const_iterator _iter181; + for (_iter181 = this->encoding_stats.begin(); _iter181 != this->encoding_stats.end(); ++_iter181) { - xfer += (*_iter177).write(oprot); + xfer += (*_iter181).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5652,74 +5751,74 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) { swap(a.__isset, b.__isset); } -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other178) { - type = other178.type; - encodings = other178.encodings; - path_in_schema = other178.path_in_schema; - codec = other178.codec; - num_values = other178.num_values; - total_uncompressed_size = other178.total_uncompressed_size; - total_compressed_size = other178.total_compressed_size; - key_value_metadata = other178.key_value_metadata; - data_page_offset = other178.data_page_offset; - index_page_offset = other178.index_page_offset; - dictionary_page_offset = other178.dictionary_page_offset; - statistics = other178.statistics; - encoding_stats = other178.encoding_stats; - bloom_filter_offset = other178.bloom_filter_offset; - __isset = other178.__isset; -} -ColumnMetaData::ColumnMetaData(ColumnMetaData&& other179) noexcept { - type = other179.type; - encodings = std::move(other179.encodings); - path_in_schema = std::move(other179.path_in_schema); - codec = other179.codec; - num_values = other179.num_values; - total_uncompressed_size = other179.total_uncompressed_size; - total_compressed_size = other179.total_compressed_size; - key_value_metadata = std::move(other179.key_value_metadata); - data_page_offset = other179.data_page_offset; - index_page_offset = other179.index_page_offset; - dictionary_page_offset = other179.dictionary_page_offset; - statistics = std::move(other179.statistics); - encoding_stats = std::move(other179.encoding_stats); - bloom_filter_offset = other179.bloom_filter_offset; - __isset = other179.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other180) { - type = other180.type; - encodings = other180.encodings; - path_in_schema = other180.path_in_schema; - codec = other180.codec; - num_values = other180.num_values; - total_uncompressed_size = other180.total_uncompressed_size; - total_compressed_size = other180.total_compressed_size; - key_value_metadata = other180.key_value_metadata; - data_page_offset = other180.data_page_offset; - index_page_offset = other180.index_page_offset; - dictionary_page_offset = other180.dictionary_page_offset; - statistics = other180.statistics; - encoding_stats = other180.encoding_stats; - bloom_filter_offset = other180.bloom_filter_offset; - __isset = other180.__isset; +ColumnMetaData::ColumnMetaData(const ColumnMetaData& other182) { + type = other182.type; + encodings = other182.encodings; + path_in_schema = other182.path_in_schema; + codec = other182.codec; + num_values = other182.num_values; + total_uncompressed_size = other182.total_uncompressed_size; + total_compressed_size = other182.total_compressed_size; + key_value_metadata = other182.key_value_metadata; + data_page_offset = other182.data_page_offset; + index_page_offset = other182.index_page_offset; + dictionary_page_offset = other182.dictionary_page_offset; + statistics = other182.statistics; + encoding_stats = other182.encoding_stats; + bloom_filter_offset = other182.bloom_filter_offset; + __isset = other182.__isset; +} +ColumnMetaData::ColumnMetaData(ColumnMetaData&& other183) noexcept { + type = other183.type; + encodings = std::move(other183.encodings); + path_in_schema = std::move(other183.path_in_schema); + codec = other183.codec; + num_values = other183.num_values; + total_uncompressed_size = other183.total_uncompressed_size; + total_compressed_size = other183.total_compressed_size; + key_value_metadata = std::move(other183.key_value_metadata); + data_page_offset = other183.data_page_offset; + index_page_offset = other183.index_page_offset; + dictionary_page_offset = other183.dictionary_page_offset; + statistics = std::move(other183.statistics); + encoding_stats = std::move(other183.encoding_stats); + bloom_filter_offset = other183.bloom_filter_offset; + __isset = other183.__isset; +} +ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other184) { + type = other184.type; + encodings = other184.encodings; + path_in_schema = other184.path_in_schema; + codec = other184.codec; + num_values = other184.num_values; + total_uncompressed_size = other184.total_uncompressed_size; + total_compressed_size = other184.total_compressed_size; + key_value_metadata = other184.key_value_metadata; + data_page_offset = other184.data_page_offset; + index_page_offset = other184.index_page_offset; + dictionary_page_offset = other184.dictionary_page_offset; + statistics = other184.statistics; + encoding_stats = other184.encoding_stats; + bloom_filter_offset = other184.bloom_filter_offset; + __isset = other184.__isset; return *this; } -ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other181) noexcept { - type = other181.type; - encodings = std::move(other181.encodings); - path_in_schema = std::move(other181.path_in_schema); - codec = other181.codec; - num_values = other181.num_values; - total_uncompressed_size = other181.total_uncompressed_size; - total_compressed_size = other181.total_compressed_size; - key_value_metadata = std::move(other181.key_value_metadata); - data_page_offset = other181.data_page_offset; - index_page_offset = other181.index_page_offset; - dictionary_page_offset = other181.dictionary_page_offset; - statistics = std::move(other181.statistics); - encoding_stats = std::move(other181.encoding_stats); - bloom_filter_offset = other181.bloom_filter_offset; - __isset = other181.__isset; +ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other185) noexcept { + type = other185.type; + encodings = std::move(other185.encodings); + path_in_schema = std::move(other185.path_in_schema); + codec = other185.codec; + num_values = other185.num_values; + total_uncompressed_size = other185.total_uncompressed_size; + total_compressed_size = other185.total_compressed_size; + key_value_metadata = std::move(other185.key_value_metadata); + data_page_offset = other185.data_page_offset; + index_page_offset = other185.index_page_offset; + dictionary_page_offset = other185.dictionary_page_offset; + statistics = std::move(other185.statistics); + encoding_stats = std::move(other185.encoding_stats); + bloom_filter_offset = other185.bloom_filter_offset; + __isset = other185.__isset; return *this; } void ColumnMetaData::printTo(std::ostream& out) const { @@ -5797,18 +5896,18 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { (void) b; } -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other182) noexcept { - (void) other182; +EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other186) noexcept { + (void) other186; } -EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other183) noexcept { - (void) other183; +EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other187) noexcept { + (void) other187; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other184) noexcept { - (void) other184; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other188) noexcept { + (void) other188; return *this; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other185) noexcept { - (void) other185; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other189) noexcept { + (void) other189; return *this; } void EncryptionWithFooterKey::printTo(std::ostream& out) const { @@ -5863,14 +5962,14 @@ uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* ip if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size186; - ::apache::thrift::protocol::TType _etype189; - xfer += iprot->readListBegin(_etype189, _size186); - this->path_in_schema.resize(_size186); - uint32_t _i190; - for (_i190 = 0; _i190 < _size186; ++_i190) + uint32_t _size190; + ::apache::thrift::protocol::TType _etype193; + xfer += iprot->readListBegin(_etype193, _size190); + this->path_in_schema.resize(_size190); + uint32_t _i194; + for (_i194 = 0; _i194 < _size190; ++_i194) { - xfer += iprot->readString(this->path_in_schema[_i190]); + xfer += iprot->readString(this->path_in_schema[_i194]); } xfer += iprot->readListEnd(); } @@ -5909,10 +6008,10 @@ uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* o xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter191; - for (_iter191 = this->path_in_schema.begin(); _iter191 != this->path_in_schema.end(); ++_iter191) + std::vector ::const_iterator _iter195; + for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) { - xfer += oprot->writeString((*_iter191)); + xfer += oprot->writeString((*_iter195)); } xfer += oprot->writeListEnd(); } @@ -5935,26 +6034,26 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { swap(a.__isset, b.__isset); } -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other192) { - path_in_schema = other192.path_in_schema; - key_metadata = other192.key_metadata; - __isset = other192.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other196) { + path_in_schema = other196.path_in_schema; + key_metadata = other196.key_metadata; + __isset = other196.__isset; } -EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other193) noexcept { - path_in_schema = std::move(other193.path_in_schema); - key_metadata = std::move(other193.key_metadata); - __isset = other193.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other197) noexcept { + path_in_schema = std::move(other197.path_in_schema); + key_metadata = std::move(other197.key_metadata); + __isset = other197.__isset; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other194) { - path_in_schema = other194.path_in_schema; - key_metadata = other194.key_metadata; - __isset = other194.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other198) { + path_in_schema = other198.path_in_schema; + key_metadata = other198.key_metadata; + __isset = other198.__isset; return *this; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other195) noexcept { - path_in_schema = std::move(other195.path_in_schema); - key_metadata = std::move(other195.key_metadata); - __isset = other195.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other199) noexcept { + path_in_schema = std::move(other199.path_in_schema); + key_metadata = std::move(other199.key_metadata); + __isset = other199.__isset; return *this; } void EncryptionWithColumnKey::printTo(std::ostream& out) const { @@ -6062,26 +6161,26 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { swap(a.__isset, b.__isset); } -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other196) { - ENCRYPTION_WITH_FOOTER_KEY = other196.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other196.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other196.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other200) { + ENCRYPTION_WITH_FOOTER_KEY = other200.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other200.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other200.__isset; } -ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other197) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other197.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other197.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other197.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other201) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other201.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other201.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other201.__isset; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other198) { - ENCRYPTION_WITH_FOOTER_KEY = other198.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other198.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other198.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other202) { + ENCRYPTION_WITH_FOOTER_KEY = other202.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other202.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other202.__isset; return *this; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other199) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other199.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other199.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other199.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other203) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other203.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other203.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other203.__isset; return *this; } void ColumnCryptoMetaData::printTo(std::ostream& out) const { @@ -6323,54 +6422,54 @@ void swap(ColumnChunk &a, ColumnChunk &b) { swap(a.__isset, b.__isset); } -ColumnChunk::ColumnChunk(const ColumnChunk& other200) { - file_path = other200.file_path; - file_offset = other200.file_offset; - meta_data = other200.meta_data; - offset_index_offset = other200.offset_index_offset; - offset_index_length = other200.offset_index_length; - column_index_offset = other200.column_index_offset; - column_index_length = other200.column_index_length; - crypto_metadata = other200.crypto_metadata; - encrypted_column_metadata = other200.encrypted_column_metadata; - __isset = other200.__isset; -} -ColumnChunk::ColumnChunk(ColumnChunk&& other201) noexcept { - file_path = std::move(other201.file_path); - file_offset = other201.file_offset; - meta_data = std::move(other201.meta_data); - offset_index_offset = other201.offset_index_offset; - offset_index_length = other201.offset_index_length; - column_index_offset = other201.column_index_offset; - column_index_length = other201.column_index_length; - crypto_metadata = std::move(other201.crypto_metadata); - encrypted_column_metadata = std::move(other201.encrypted_column_metadata); - __isset = other201.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other202) { - file_path = other202.file_path; - file_offset = other202.file_offset; - meta_data = other202.meta_data; - offset_index_offset = other202.offset_index_offset; - offset_index_length = other202.offset_index_length; - column_index_offset = other202.column_index_offset; - column_index_length = other202.column_index_length; - crypto_metadata = other202.crypto_metadata; - encrypted_column_metadata = other202.encrypted_column_metadata; - __isset = other202.__isset; +ColumnChunk::ColumnChunk(const ColumnChunk& other204) { + file_path = other204.file_path; + file_offset = other204.file_offset; + meta_data = other204.meta_data; + offset_index_offset = other204.offset_index_offset; + offset_index_length = other204.offset_index_length; + column_index_offset = other204.column_index_offset; + column_index_length = other204.column_index_length; + crypto_metadata = other204.crypto_metadata; + encrypted_column_metadata = other204.encrypted_column_metadata; + __isset = other204.__isset; +} +ColumnChunk::ColumnChunk(ColumnChunk&& other205) noexcept { + file_path = std::move(other205.file_path); + file_offset = other205.file_offset; + meta_data = std::move(other205.meta_data); + offset_index_offset = other205.offset_index_offset; + offset_index_length = other205.offset_index_length; + column_index_offset = other205.column_index_offset; + column_index_length = other205.column_index_length; + crypto_metadata = std::move(other205.crypto_metadata); + encrypted_column_metadata = std::move(other205.encrypted_column_metadata); + __isset = other205.__isset; +} +ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other206) { + file_path = other206.file_path; + file_offset = other206.file_offset; + meta_data = other206.meta_data; + offset_index_offset = other206.offset_index_offset; + offset_index_length = other206.offset_index_length; + column_index_offset = other206.column_index_offset; + column_index_length = other206.column_index_length; + crypto_metadata = other206.crypto_metadata; + encrypted_column_metadata = other206.encrypted_column_metadata; + __isset = other206.__isset; return *this; } -ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other203) noexcept { - file_path = std::move(other203.file_path); - file_offset = other203.file_offset; - meta_data = std::move(other203.meta_data); - offset_index_offset = other203.offset_index_offset; - offset_index_length = other203.offset_index_length; - column_index_offset = other203.column_index_offset; - column_index_length = other203.column_index_length; - crypto_metadata = std::move(other203.crypto_metadata); - encrypted_column_metadata = std::move(other203.encrypted_column_metadata); - __isset = other203.__isset; +ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other207) noexcept { + file_path = std::move(other207.file_path); + file_offset = other207.file_offset; + meta_data = std::move(other207.meta_data); + offset_index_offset = other207.offset_index_offset; + offset_index_length = other207.offset_index_length; + column_index_offset = other207.column_index_offset; + column_index_length = other207.column_index_length; + crypto_metadata = std::move(other207.crypto_metadata); + encrypted_column_metadata = std::move(other207.encrypted_column_metadata); + __isset = other207.__isset; return *this; } void ColumnChunk::printTo(std::ostream& out) const { @@ -6459,14 +6558,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->columns.clear(); - uint32_t _size204; - ::apache::thrift::protocol::TType _etype207; - xfer += iprot->readListBegin(_etype207, _size204); - this->columns.resize(_size204); - uint32_t _i208; - for (_i208 = 0; _i208 < _size204; ++_i208) + uint32_t _size208; + ::apache::thrift::protocol::TType _etype211; + xfer += iprot->readListBegin(_etype211, _size208); + this->columns.resize(_size208); + uint32_t _i212; + for (_i212 = 0; _i212 < _size208; ++_i212) { - xfer += this->columns[_i208].read(iprot); + xfer += this->columns[_i212].read(iprot); } xfer += iprot->readListEnd(); } @@ -6495,14 +6594,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->sorting_columns.clear(); - uint32_t _size209; - ::apache::thrift::protocol::TType _etype212; - xfer += iprot->readListBegin(_etype212, _size209); - this->sorting_columns.resize(_size209); - uint32_t _i213; - for (_i213 = 0; _i213 < _size209; ++_i213) + uint32_t _size213; + ::apache::thrift::protocol::TType _etype216; + xfer += iprot->readListBegin(_etype216, _size213); + this->sorting_columns.resize(_size213); + uint32_t _i217; + for (_i217 = 0; _i217 < _size213; ++_i217) { - xfer += this->sorting_columns[_i213].read(iprot); + xfer += this->sorting_columns[_i217].read(iprot); } xfer += iprot->readListEnd(); } @@ -6561,10 +6660,10 @@ uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter214; - for (_iter214 = this->columns.begin(); _iter214 != this->columns.end(); ++_iter214) + std::vector ::const_iterator _iter218; + for (_iter218 = this->columns.begin(); _iter218 != this->columns.end(); ++_iter218) { - xfer += (*_iter214).write(oprot); + xfer += (*_iter218).write(oprot); } xfer += oprot->writeListEnd(); } @@ -6582,10 +6681,10 @@ uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter215; - for (_iter215 = this->sorting_columns.begin(); _iter215 != this->sorting_columns.end(); ++_iter215) + std::vector ::const_iterator _iter219; + for (_iter219 = this->sorting_columns.begin(); _iter219 != this->sorting_columns.end(); ++_iter219) { - xfer += (*_iter215).write(oprot); + xfer += (*_iter219).write(oprot); } xfer += oprot->writeListEnd(); } @@ -6623,46 +6722,46 @@ void swap(RowGroup &a, RowGroup &b) { swap(a.__isset, b.__isset); } -RowGroup::RowGroup(const RowGroup& other216) { - columns = other216.columns; - total_byte_size = other216.total_byte_size; - num_rows = other216.num_rows; - sorting_columns = other216.sorting_columns; - file_offset = other216.file_offset; - total_compressed_size = other216.total_compressed_size; - ordinal = other216.ordinal; - __isset = other216.__isset; -} -RowGroup::RowGroup(RowGroup&& other217) noexcept { - columns = std::move(other217.columns); - total_byte_size = other217.total_byte_size; - num_rows = other217.num_rows; - sorting_columns = std::move(other217.sorting_columns); - file_offset = other217.file_offset; - total_compressed_size = other217.total_compressed_size; - ordinal = other217.ordinal; - __isset = other217.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other218) { - columns = other218.columns; - total_byte_size = other218.total_byte_size; - num_rows = other218.num_rows; - sorting_columns = other218.sorting_columns; - file_offset = other218.file_offset; - total_compressed_size = other218.total_compressed_size; - ordinal = other218.ordinal; - __isset = other218.__isset; +RowGroup::RowGroup(const RowGroup& other220) { + columns = other220.columns; + total_byte_size = other220.total_byte_size; + num_rows = other220.num_rows; + sorting_columns = other220.sorting_columns; + file_offset = other220.file_offset; + total_compressed_size = other220.total_compressed_size; + ordinal = other220.ordinal; + __isset = other220.__isset; +} +RowGroup::RowGroup(RowGroup&& other221) noexcept { + columns = std::move(other221.columns); + total_byte_size = other221.total_byte_size; + num_rows = other221.num_rows; + sorting_columns = std::move(other221.sorting_columns); + file_offset = other221.file_offset; + total_compressed_size = other221.total_compressed_size; + ordinal = other221.ordinal; + __isset = other221.__isset; +} +RowGroup& RowGroup::operator=(const RowGroup& other222) { + columns = other222.columns; + total_byte_size = other222.total_byte_size; + num_rows = other222.num_rows; + sorting_columns = other222.sorting_columns; + file_offset = other222.file_offset; + total_compressed_size = other222.total_compressed_size; + ordinal = other222.ordinal; + __isset = other222.__isset; return *this; } -RowGroup& RowGroup::operator=(RowGroup&& other219) noexcept { - columns = std::move(other219.columns); - total_byte_size = other219.total_byte_size; - num_rows = other219.num_rows; - sorting_columns = std::move(other219.sorting_columns); - file_offset = other219.file_offset; - total_compressed_size = other219.total_compressed_size; - ordinal = other219.ordinal; - __isset = other219.__isset; +RowGroup& RowGroup::operator=(RowGroup&& other223) noexcept { + columns = std::move(other223.columns); + total_byte_size = other223.total_byte_size; + num_rows = other223.num_rows; + sorting_columns = std::move(other223.sorting_columns); + file_offset = other223.file_offset; + total_compressed_size = other223.total_compressed_size; + ordinal = other223.ordinal; + __isset = other223.__isset; return *this; } void RowGroup::printTo(std::ostream& out) const { @@ -6733,18 +6832,18 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { (void) b; } -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other220) noexcept { - (void) other220; +TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other224) noexcept { + (void) other224; } -TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other221) noexcept { - (void) other221; +TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other225) noexcept { + (void) other225; } -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other222) noexcept { - (void) other222; +TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other226) noexcept { + (void) other226; return *this; } -TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other223) noexcept { - (void) other223; +TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other227) noexcept { + (void) other227; return *this; } void TypeDefinedOrder::printTo(std::ostream& out) const { @@ -6831,22 +6930,22 @@ void swap(ColumnOrder &a, ColumnOrder &b) { swap(a.__isset, b.__isset); } -ColumnOrder::ColumnOrder(const ColumnOrder& other224) noexcept { - TYPE_ORDER = other224.TYPE_ORDER; - __isset = other224.__isset; +ColumnOrder::ColumnOrder(const ColumnOrder& other228) noexcept { + TYPE_ORDER = other228.TYPE_ORDER; + __isset = other228.__isset; } -ColumnOrder::ColumnOrder(ColumnOrder&& other225) noexcept { - TYPE_ORDER = std::move(other225.TYPE_ORDER); - __isset = other225.__isset; +ColumnOrder::ColumnOrder(ColumnOrder&& other229) noexcept { + TYPE_ORDER = std::move(other229.TYPE_ORDER); + __isset = other229.__isset; } -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other226) noexcept { - TYPE_ORDER = other226.TYPE_ORDER; - __isset = other226.__isset; +ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other230) noexcept { + TYPE_ORDER = other230.TYPE_ORDER; + __isset = other230.__isset; return *this; } -ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other227) noexcept { - TYPE_ORDER = std::move(other227.TYPE_ORDER); - __isset = other227.__isset; +ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other231) noexcept { + TYPE_ORDER = std::move(other231.TYPE_ORDER); + __isset = other231.__isset; return *this; } void ColumnOrder::printTo(std::ostream& out) const { @@ -6974,26 +7073,26 @@ void swap(PageLocation &a, PageLocation &b) { swap(a.first_row_index, b.first_row_index); } -PageLocation::PageLocation(const PageLocation& other228) noexcept { - offset = other228.offset; - compressed_page_size = other228.compressed_page_size; - first_row_index = other228.first_row_index; +PageLocation::PageLocation(const PageLocation& other232) noexcept { + offset = other232.offset; + compressed_page_size = other232.compressed_page_size; + first_row_index = other232.first_row_index; } -PageLocation::PageLocation(PageLocation&& other229) noexcept { - offset = other229.offset; - compressed_page_size = other229.compressed_page_size; - first_row_index = other229.first_row_index; +PageLocation::PageLocation(PageLocation&& other233) noexcept { + offset = other233.offset; + compressed_page_size = other233.compressed_page_size; + first_row_index = other233.first_row_index; } -PageLocation& PageLocation::operator=(const PageLocation& other230) noexcept { - offset = other230.offset; - compressed_page_size = other230.compressed_page_size; - first_row_index = other230.first_row_index; +PageLocation& PageLocation::operator=(const PageLocation& other234) noexcept { + offset = other234.offset; + compressed_page_size = other234.compressed_page_size; + first_row_index = other234.first_row_index; return *this; } -PageLocation& PageLocation::operator=(PageLocation&& other231) noexcept { - offset = other231.offset; - compressed_page_size = other231.compressed_page_size; - first_row_index = other231.first_row_index; +PageLocation& PageLocation::operator=(PageLocation&& other235) noexcept { + offset = other235.offset; + compressed_page_size = other235.compressed_page_size; + first_row_index = other235.first_row_index; return *this; } void PageLocation::printTo(std::ostream& out) const { @@ -7046,14 +7145,14 @@ uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->page_locations.clear(); - uint32_t _size232; - ::apache::thrift::protocol::TType _etype235; - xfer += iprot->readListBegin(_etype235, _size232); - this->page_locations.resize(_size232); - uint32_t _i236; - for (_i236 = 0; _i236 < _size232; ++_i236) + uint32_t _size236; + ::apache::thrift::protocol::TType _etype239; + xfer += iprot->readListBegin(_etype239, _size236); + this->page_locations.resize(_size236); + uint32_t _i240; + for (_i240 = 0; _i240 < _size236; ++_i240) { - xfer += this->page_locations[_i236].read(iprot); + xfer += this->page_locations[_i240].read(iprot); } xfer += iprot->readListEnd(); } @@ -7084,10 +7183,10 @@ uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter237; - for (_iter237 = this->page_locations.begin(); _iter237 != this->page_locations.end(); ++_iter237) + std::vector ::const_iterator _iter241; + for (_iter241 = this->page_locations.begin(); _iter241 != this->page_locations.end(); ++_iter241) { - xfer += (*_iter237).write(oprot); + xfer += (*_iter241).write(oprot); } xfer += oprot->writeListEnd(); } @@ -7103,18 +7202,18 @@ void swap(OffsetIndex &a, OffsetIndex &b) { swap(a.page_locations, b.page_locations); } -OffsetIndex::OffsetIndex(const OffsetIndex& other238) { - page_locations = other238.page_locations; +OffsetIndex::OffsetIndex(const OffsetIndex& other242) { + page_locations = other242.page_locations; } -OffsetIndex::OffsetIndex(OffsetIndex&& other239) noexcept { - page_locations = std::move(other239.page_locations); +OffsetIndex::OffsetIndex(OffsetIndex&& other243) noexcept { + page_locations = std::move(other243.page_locations); } -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other240) { - page_locations = other240.page_locations; +OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other244) { + page_locations = other244.page_locations; return *this; } -OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other241) noexcept { - page_locations = std::move(other241.page_locations); +OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other245) noexcept { + page_locations = std::move(other245.page_locations); return *this; } void OffsetIndex::printTo(std::ostream& out) const { @@ -7185,14 +7284,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_pages.clear(); - uint32_t _size242; - ::apache::thrift::protocol::TType _etype245; - xfer += iprot->readListBegin(_etype245, _size242); - this->null_pages.resize(_size242); - uint32_t _i246; - for (_i246 = 0; _i246 < _size242; ++_i246) + uint32_t _size246; + ::apache::thrift::protocol::TType _etype249; + xfer += iprot->readListBegin(_etype249, _size246); + this->null_pages.resize(_size246); + uint32_t _i250; + for (_i250 = 0; _i250 < _size246; ++_i250) { - xfer += iprot->readBool(this->null_pages[_i246]); + xfer += iprot->readBool(this->null_pages[_i250]); } xfer += iprot->readListEnd(); } @@ -7205,14 +7304,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->min_values.clear(); - uint32_t _size247; - ::apache::thrift::protocol::TType _etype250; - xfer += iprot->readListBegin(_etype250, _size247); - this->min_values.resize(_size247); - uint32_t _i251; - for (_i251 = 0; _i251 < _size247; ++_i251) + uint32_t _size251; + ::apache::thrift::protocol::TType _etype254; + xfer += iprot->readListBegin(_etype254, _size251); + this->min_values.resize(_size251); + uint32_t _i255; + for (_i255 = 0; _i255 < _size251; ++_i255) { - xfer += iprot->readBinary(this->min_values[_i251]); + xfer += iprot->readBinary(this->min_values[_i255]); } xfer += iprot->readListEnd(); } @@ -7225,14 +7324,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->max_values.clear(); - uint32_t _size252; - ::apache::thrift::protocol::TType _etype255; - xfer += iprot->readListBegin(_etype255, _size252); - this->max_values.resize(_size252); - uint32_t _i256; - for (_i256 = 0; _i256 < _size252; ++_i256) + uint32_t _size256; + ::apache::thrift::protocol::TType _etype259; + xfer += iprot->readListBegin(_etype259, _size256); + this->max_values.resize(_size256); + uint32_t _i260; + for (_i260 = 0; _i260 < _size256; ++_i260) { - xfer += iprot->readBinary(this->max_values[_i256]); + xfer += iprot->readBinary(this->max_values[_i260]); } xfer += iprot->readListEnd(); } @@ -7243,9 +7342,9 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast257; - xfer += iprot->readI32(ecast257); - this->boundary_order = static_cast(ecast257); + int32_t ecast261; + xfer += iprot->readI32(ecast261); + this->boundary_order = static_cast(ecast261); isset_boundary_order = true; } else { xfer += iprot->skip(ftype); @@ -7255,14 +7354,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_counts.clear(); - uint32_t _size258; - ::apache::thrift::protocol::TType _etype261; - xfer += iprot->readListBegin(_etype261, _size258); - this->null_counts.resize(_size258); - uint32_t _i262; - for (_i262 = 0; _i262 < _size258; ++_i262) + uint32_t _size262; + ::apache::thrift::protocol::TType _etype265; + xfer += iprot->readListBegin(_etype265, _size262); + this->null_counts.resize(_size262); + uint32_t _i266; + for (_i266 = 0; _i266 < _size262; ++_i266) { - xfer += iprot->readI64(this->null_counts[_i262]); + xfer += iprot->readI64(this->null_counts[_i266]); } xfer += iprot->readListEnd(); } @@ -7299,10 +7398,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter263; - for (_iter263 = this->null_pages.begin(); _iter263 != this->null_pages.end(); ++_iter263) + std::vector ::const_iterator _iter267; + for (_iter267 = this->null_pages.begin(); _iter267 != this->null_pages.end(); ++_iter267) { - xfer += oprot->writeBool((*_iter263)); + xfer += oprot->writeBool((*_iter267)); } xfer += oprot->writeListEnd(); } @@ -7311,10 +7410,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter264; - for (_iter264 = this->min_values.begin(); _iter264 != this->min_values.end(); ++_iter264) + std::vector ::const_iterator _iter268; + for (_iter268 = this->min_values.begin(); _iter268 != this->min_values.end(); ++_iter268) { - xfer += oprot->writeBinary((*_iter264)); + xfer += oprot->writeBinary((*_iter268)); } xfer += oprot->writeListEnd(); } @@ -7323,10 +7422,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter265; - for (_iter265 = this->max_values.begin(); _iter265 != this->max_values.end(); ++_iter265) + std::vector ::const_iterator _iter269; + for (_iter269 = this->max_values.begin(); _iter269 != this->max_values.end(); ++_iter269) { - xfer += oprot->writeBinary((*_iter265)); + xfer += oprot->writeBinary((*_iter269)); } xfer += oprot->writeListEnd(); } @@ -7340,10 +7439,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter266; - for (_iter266 = this->null_counts.begin(); _iter266 != this->null_counts.end(); ++_iter266) + std::vector ::const_iterator _iter270; + for (_iter270 = this->null_counts.begin(); _iter270 != this->null_counts.end(); ++_iter270) { - xfer += oprot->writeI64((*_iter266)); + xfer += oprot->writeI64((*_iter270)); } xfer += oprot->writeListEnd(); } @@ -7364,38 +7463,38 @@ void swap(ColumnIndex &a, ColumnIndex &b) { swap(a.__isset, b.__isset); } -ColumnIndex::ColumnIndex(const ColumnIndex& other267) { - null_pages = other267.null_pages; - min_values = other267.min_values; - max_values = other267.max_values; - boundary_order = other267.boundary_order; - null_counts = other267.null_counts; - __isset = other267.__isset; -} -ColumnIndex::ColumnIndex(ColumnIndex&& other268) noexcept { - null_pages = std::move(other268.null_pages); - min_values = std::move(other268.min_values); - max_values = std::move(other268.max_values); - boundary_order = other268.boundary_order; - null_counts = std::move(other268.null_counts); - __isset = other268.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other269) { - null_pages = other269.null_pages; - min_values = other269.min_values; - max_values = other269.max_values; - boundary_order = other269.boundary_order; - null_counts = other269.null_counts; - __isset = other269.__isset; +ColumnIndex::ColumnIndex(const ColumnIndex& other271) { + null_pages = other271.null_pages; + min_values = other271.min_values; + max_values = other271.max_values; + boundary_order = other271.boundary_order; + null_counts = other271.null_counts; + __isset = other271.__isset; +} +ColumnIndex::ColumnIndex(ColumnIndex&& other272) noexcept { + null_pages = std::move(other272.null_pages); + min_values = std::move(other272.min_values); + max_values = std::move(other272.max_values); + boundary_order = other272.boundary_order; + null_counts = std::move(other272.null_counts); + __isset = other272.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other273) { + null_pages = other273.null_pages; + min_values = other273.min_values; + max_values = other273.max_values; + boundary_order = other273.boundary_order; + null_counts = other273.null_counts; + __isset = other273.__isset; return *this; } -ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other270) noexcept { - null_pages = std::move(other270.null_pages); - min_values = std::move(other270.min_values); - max_values = std::move(other270.max_values); - boundary_order = other270.boundary_order; - null_counts = std::move(other270.null_counts); - __isset = other270.__isset; +ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other274) noexcept { + null_pages = std::move(other274.null_pages); + min_values = std::move(other274.min_values); + max_values = std::move(other274.max_values); + boundary_order = other274.boundary_order; + null_counts = std::move(other274.null_counts); + __isset = other274.__isset; return *this; } void ColumnIndex::printTo(std::ostream& out) const { @@ -7525,30 +7624,30 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) { swap(a.__isset, b.__isset); } -AesGcmV1::AesGcmV1(const AesGcmV1& other271) { - aad_prefix = other271.aad_prefix; - aad_file_unique = other271.aad_file_unique; - supply_aad_prefix = other271.supply_aad_prefix; - __isset = other271.__isset; +AesGcmV1::AesGcmV1(const AesGcmV1& other275) { + aad_prefix = other275.aad_prefix; + aad_file_unique = other275.aad_file_unique; + supply_aad_prefix = other275.supply_aad_prefix; + __isset = other275.__isset; } -AesGcmV1::AesGcmV1(AesGcmV1&& other272) noexcept { - aad_prefix = std::move(other272.aad_prefix); - aad_file_unique = std::move(other272.aad_file_unique); - supply_aad_prefix = other272.supply_aad_prefix; - __isset = other272.__isset; +AesGcmV1::AesGcmV1(AesGcmV1&& other276) noexcept { + aad_prefix = std::move(other276.aad_prefix); + aad_file_unique = std::move(other276.aad_file_unique); + supply_aad_prefix = other276.supply_aad_prefix; + __isset = other276.__isset; } -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other273) { - aad_prefix = other273.aad_prefix; - aad_file_unique = other273.aad_file_unique; - supply_aad_prefix = other273.supply_aad_prefix; - __isset = other273.__isset; +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other277) { + aad_prefix = other277.aad_prefix; + aad_file_unique = other277.aad_file_unique; + supply_aad_prefix = other277.supply_aad_prefix; + __isset = other277.__isset; return *this; } -AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other274) noexcept { - aad_prefix = std::move(other274.aad_prefix); - aad_file_unique = std::move(other274.aad_file_unique); - supply_aad_prefix = other274.supply_aad_prefix; - __isset = other274.__isset; +AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other278) noexcept { + aad_prefix = std::move(other278.aad_prefix); + aad_file_unique = std::move(other278.aad_file_unique); + supply_aad_prefix = other278.supply_aad_prefix; + __isset = other278.__isset; return *this; } void AesGcmV1::printTo(std::ostream& out) const { @@ -7676,30 +7775,30 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { swap(a.__isset, b.__isset); } -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other275) { - aad_prefix = other275.aad_prefix; - aad_file_unique = other275.aad_file_unique; - supply_aad_prefix = other275.supply_aad_prefix; - __isset = other275.__isset; +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other279) { + aad_prefix = other279.aad_prefix; + aad_file_unique = other279.aad_file_unique; + supply_aad_prefix = other279.supply_aad_prefix; + __isset = other279.__isset; } -AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other276) noexcept { - aad_prefix = std::move(other276.aad_prefix); - aad_file_unique = std::move(other276.aad_file_unique); - supply_aad_prefix = other276.supply_aad_prefix; - __isset = other276.__isset; +AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other280) noexcept { + aad_prefix = std::move(other280.aad_prefix); + aad_file_unique = std::move(other280.aad_file_unique); + supply_aad_prefix = other280.supply_aad_prefix; + __isset = other280.__isset; } -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other277) { - aad_prefix = other277.aad_prefix; - aad_file_unique = other277.aad_file_unique; - supply_aad_prefix = other277.supply_aad_prefix; - __isset = other277.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other281) { + aad_prefix = other281.aad_prefix; + aad_file_unique = other281.aad_file_unique; + supply_aad_prefix = other281.supply_aad_prefix; + __isset = other281.__isset; return *this; } -AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other278) noexcept { - aad_prefix = std::move(other278.aad_prefix); - aad_file_unique = std::move(other278.aad_file_unique); - supply_aad_prefix = other278.supply_aad_prefix; - __isset = other278.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other282) noexcept { + aad_prefix = std::move(other282.aad_prefix); + aad_file_unique = std::move(other282.aad_file_unique); + supply_aad_prefix = other282.supply_aad_prefix; + __isset = other282.__isset; return *this; } void AesGcmCtrV1::printTo(std::ostream& out) const { @@ -7808,26 +7907,26 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { swap(a.__isset, b.__isset); } -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other279) { - AES_GCM_V1 = other279.AES_GCM_V1; - AES_GCM_CTR_V1 = other279.AES_GCM_CTR_V1; - __isset = other279.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other283) { + AES_GCM_V1 = other283.AES_GCM_V1; + AES_GCM_CTR_V1 = other283.AES_GCM_CTR_V1; + __isset = other283.__isset; } -EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other280) noexcept { - AES_GCM_V1 = std::move(other280.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other280.AES_GCM_CTR_V1); - __isset = other280.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other284) noexcept { + AES_GCM_V1 = std::move(other284.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other284.AES_GCM_CTR_V1); + __isset = other284.__isset; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other281) { - AES_GCM_V1 = other281.AES_GCM_V1; - AES_GCM_CTR_V1 = other281.AES_GCM_CTR_V1; - __isset = other281.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other285) { + AES_GCM_V1 = other285.AES_GCM_V1; + AES_GCM_CTR_V1 = other285.AES_GCM_CTR_V1; + __isset = other285.__isset; return *this; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other282) noexcept { - AES_GCM_V1 = std::move(other282.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other282.AES_GCM_CTR_V1); - __isset = other282.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other286) noexcept { + AES_GCM_V1 = std::move(other286.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other286.AES_GCM_CTR_V1); + __isset = other286.__isset; return *this; } void EncryptionAlgorithm::printTo(std::ostream& out) const { @@ -7927,14 +8026,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size283; - ::apache::thrift::protocol::TType _etype286; - xfer += iprot->readListBegin(_etype286, _size283); - this->schema.resize(_size283); - uint32_t _i287; - for (_i287 = 0; _i287 < _size283; ++_i287) + uint32_t _size287; + ::apache::thrift::protocol::TType _etype290; + xfer += iprot->readListBegin(_etype290, _size287); + this->schema.resize(_size287); + uint32_t _i291; + for (_i291 = 0; _i291 < _size287; ++_i291) { - xfer += this->schema[_i287].read(iprot); + xfer += this->schema[_i291].read(iprot); } xfer += iprot->readListEnd(); } @@ -7955,14 +8054,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size288; - ::apache::thrift::protocol::TType _etype291; - xfer += iprot->readListBegin(_etype291, _size288); - this->row_groups.resize(_size288); - uint32_t _i292; - for (_i292 = 0; _i292 < _size288; ++_i292) + uint32_t _size292; + ::apache::thrift::protocol::TType _etype295; + xfer += iprot->readListBegin(_etype295, _size292); + this->row_groups.resize(_size292); + uint32_t _i296; + for (_i296 = 0; _i296 < _size292; ++_i296) { - xfer += this->row_groups[_i292].read(iprot); + xfer += this->row_groups[_i296].read(iprot); } xfer += iprot->readListEnd(); } @@ -7975,14 +8074,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size293; - ::apache::thrift::protocol::TType _etype296; - xfer += iprot->readListBegin(_etype296, _size293); - this->key_value_metadata.resize(_size293); - uint32_t _i297; - for (_i297 = 0; _i297 < _size293; ++_i297) + uint32_t _size297; + ::apache::thrift::protocol::TType _etype300; + xfer += iprot->readListBegin(_etype300, _size297); + this->key_value_metadata.resize(_size297); + uint32_t _i301; + for (_i301 = 0; _i301 < _size297; ++_i301) { - xfer += this->key_value_metadata[_i297].read(iprot); + xfer += this->key_value_metadata[_i301].read(iprot); } xfer += iprot->readListEnd(); } @@ -8003,14 +8102,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->column_orders.clear(); - uint32_t _size298; - ::apache::thrift::protocol::TType _etype301; - xfer += iprot->readListBegin(_etype301, _size298); - this->column_orders.resize(_size298); - uint32_t _i302; - for (_i302 = 0; _i302 < _size298; ++_i302) + uint32_t _size302; + ::apache::thrift::protocol::TType _etype305; + xfer += iprot->readListBegin(_etype305, _size302); + this->column_orders.resize(_size302); + uint32_t _i306; + for (_i306 = 0; _i306 < _size302; ++_i306) { - xfer += this->column_orders[_i302].read(iprot); + xfer += this->column_orders[_i306].read(iprot); } xfer += iprot->readListEnd(); } @@ -8067,10 +8166,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter303; - for (_iter303 = this->schema.begin(); _iter303 != this->schema.end(); ++_iter303) + std::vector ::const_iterator _iter307; + for (_iter307 = this->schema.begin(); _iter307 != this->schema.end(); ++_iter307) { - xfer += (*_iter303).write(oprot); + xfer += (*_iter307).write(oprot); } xfer += oprot->writeListEnd(); } @@ -8083,10 +8182,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter304; - for (_iter304 = this->row_groups.begin(); _iter304 != this->row_groups.end(); ++_iter304) + std::vector ::const_iterator _iter308; + for (_iter308 = this->row_groups.begin(); _iter308 != this->row_groups.end(); ++_iter308) { - xfer += (*_iter304).write(oprot); + xfer += (*_iter308).write(oprot); } xfer += oprot->writeListEnd(); } @@ -8096,10 +8195,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter305; - for (_iter305 = this->key_value_metadata.begin(); _iter305 != this->key_value_metadata.end(); ++_iter305) + std::vector ::const_iterator _iter309; + for (_iter309 = this->key_value_metadata.begin(); _iter309 != this->key_value_metadata.end(); ++_iter309) { - xfer += (*_iter305).write(oprot); + xfer += (*_iter309).write(oprot); } xfer += oprot->writeListEnd(); } @@ -8114,10 +8213,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter306; - for (_iter306 = this->column_orders.begin(); _iter306 != this->column_orders.end(); ++_iter306) + std::vector ::const_iterator _iter310; + for (_iter310 = this->column_orders.begin(); _iter310 != this->column_orders.end(); ++_iter310) { - xfer += (*_iter306).write(oprot); + xfer += (*_iter310).write(oprot); } xfer += oprot->writeListEnd(); } @@ -8152,54 +8251,54 @@ void swap(FileMetaData &a, FileMetaData &b) { swap(a.__isset, b.__isset); } -FileMetaData::FileMetaData(const FileMetaData& other307) { - version = other307.version; - schema = other307.schema; - num_rows = other307.num_rows; - row_groups = other307.row_groups; - key_value_metadata = other307.key_value_metadata; - created_by = other307.created_by; - column_orders = other307.column_orders; - encryption_algorithm = other307.encryption_algorithm; - footer_signing_key_metadata = other307.footer_signing_key_metadata; - __isset = other307.__isset; -} -FileMetaData::FileMetaData(FileMetaData&& other308) noexcept { - version = other308.version; - schema = std::move(other308.schema); - num_rows = other308.num_rows; - row_groups = std::move(other308.row_groups); - key_value_metadata = std::move(other308.key_value_metadata); - created_by = std::move(other308.created_by); - column_orders = std::move(other308.column_orders); - encryption_algorithm = std::move(other308.encryption_algorithm); - footer_signing_key_metadata = std::move(other308.footer_signing_key_metadata); - __isset = other308.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other309) { - version = other309.version; - schema = other309.schema; - num_rows = other309.num_rows; - row_groups = other309.row_groups; - key_value_metadata = other309.key_value_metadata; - created_by = other309.created_by; - column_orders = other309.column_orders; - encryption_algorithm = other309.encryption_algorithm; - footer_signing_key_metadata = other309.footer_signing_key_metadata; - __isset = other309.__isset; +FileMetaData::FileMetaData(const FileMetaData& other311) { + version = other311.version; + schema = other311.schema; + num_rows = other311.num_rows; + row_groups = other311.row_groups; + key_value_metadata = other311.key_value_metadata; + created_by = other311.created_by; + column_orders = other311.column_orders; + encryption_algorithm = other311.encryption_algorithm; + footer_signing_key_metadata = other311.footer_signing_key_metadata; + __isset = other311.__isset; +} +FileMetaData::FileMetaData(FileMetaData&& other312) noexcept { + version = other312.version; + schema = std::move(other312.schema); + num_rows = other312.num_rows; + row_groups = std::move(other312.row_groups); + key_value_metadata = std::move(other312.key_value_metadata); + created_by = std::move(other312.created_by); + column_orders = std::move(other312.column_orders); + encryption_algorithm = std::move(other312.encryption_algorithm); + footer_signing_key_metadata = std::move(other312.footer_signing_key_metadata); + __isset = other312.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other313) { + version = other313.version; + schema = other313.schema; + num_rows = other313.num_rows; + row_groups = other313.row_groups; + key_value_metadata = other313.key_value_metadata; + created_by = other313.created_by; + column_orders = other313.column_orders; + encryption_algorithm = other313.encryption_algorithm; + footer_signing_key_metadata = other313.footer_signing_key_metadata; + __isset = other313.__isset; return *this; } -FileMetaData& FileMetaData::operator=(FileMetaData&& other310) noexcept { - version = other310.version; - schema = std::move(other310.schema); - num_rows = other310.num_rows; - row_groups = std::move(other310.row_groups); - key_value_metadata = std::move(other310.key_value_metadata); - created_by = std::move(other310.created_by); - column_orders = std::move(other310.column_orders); - encryption_algorithm = std::move(other310.encryption_algorithm); - footer_signing_key_metadata = std::move(other310.footer_signing_key_metadata); - __isset = other310.__isset; +FileMetaData& FileMetaData::operator=(FileMetaData&& other314) noexcept { + version = other314.version; + schema = std::move(other314.schema); + num_rows = other314.num_rows; + row_groups = std::move(other314.row_groups); + key_value_metadata = std::move(other314.key_value_metadata); + created_by = std::move(other314.created_by); + column_orders = std::move(other314.column_orders); + encryption_algorithm = std::move(other314.encryption_algorithm); + footer_signing_key_metadata = std::move(other314.footer_signing_key_metadata); + __isset = other314.__isset; return *this; } void FileMetaData::printTo(std::ostream& out) const { @@ -8315,26 +8414,26 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { swap(a.__isset, b.__isset); } -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other311) { - encryption_algorithm = other311.encryption_algorithm; - key_metadata = other311.key_metadata; - __isset = other311.__isset; +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other315) { + encryption_algorithm = other315.encryption_algorithm; + key_metadata = other315.key_metadata; + __isset = other315.__isset; } -FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other312) noexcept { - encryption_algorithm = std::move(other312.encryption_algorithm); - key_metadata = std::move(other312.key_metadata); - __isset = other312.__isset; +FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other316) noexcept { + encryption_algorithm = std::move(other316.encryption_algorithm); + key_metadata = std::move(other316.key_metadata); + __isset = other316.__isset; } -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other313) { - encryption_algorithm = other313.encryption_algorithm; - key_metadata = other313.key_metadata; - __isset = other313.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other317) { + encryption_algorithm = other317.encryption_algorithm; + key_metadata = other317.key_metadata; + __isset = other317.__isset; return *this; } -FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other314) noexcept { - encryption_algorithm = std::move(other314.encryption_algorithm); - key_metadata = std::move(other314.key_metadata); - __isset = other314.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other318) noexcept { + encryption_algorithm = std::move(other318.encryption_algorithm); + key_metadata = std::move(other318.key_metadata); + __isset = other318.__isset; return *this; } void FileCryptoMetaData::printTo(std::ostream& out) const { diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 9f468b5051db3..199b4ae747667 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -359,6 +359,8 @@ class EnumType; class DateType; +class Float16Type; + class NullType; class DecimalType; @@ -770,6 +772,39 @@ void swap(DateType &a, DateType &b); std::ostream& operator<<(std::ostream& out, const DateType& obj); +class Float16Type : public virtual ::apache::thrift::TBase { + public: + + Float16Type(const Float16Type&) noexcept; + Float16Type(Float16Type&&) noexcept; + Float16Type& operator=(const Float16Type&) noexcept; + Float16Type& operator=(Float16Type&&) noexcept; + Float16Type() noexcept { + } + + virtual ~Float16Type() noexcept; + + bool operator == (const Float16Type & /* rhs */) const + { + return true; + } + bool operator != (const Float16Type &rhs) const { + return !(*this == rhs); + } + + bool operator < (const Float16Type & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(Float16Type &a, Float16Type &b); + +std::ostream& operator<<(std::ostream& out, const Float16Type& obj); + + /** * Logical type to annotate a column that is always null. * @@ -1253,7 +1288,7 @@ void swap(BsonType &a, BsonType &b); std::ostream& operator<<(std::ostream& out, const BsonType& obj); typedef struct _LogicalType__isset { - _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false) {} + _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false), FLOAT16(false) {} bool STRING :1; bool MAP :1; bool LIST :1; @@ -1267,6 +1302,7 @@ typedef struct _LogicalType__isset { bool JSON :1; bool BSON :1; bool UUID :1; + bool FLOAT16 :1; } _LogicalType__isset; /** @@ -1300,6 +1336,7 @@ class LogicalType : public virtual ::apache::thrift::TBase { JsonType JSON; BsonType BSON; UUIDType UUID; + Float16Type FLOAT16; _LogicalType__isset __isset; @@ -1329,6 +1366,8 @@ class LogicalType : public virtual ::apache::thrift::TBase { void __set_UUID(const UUIDType& val); + void __set_FLOAT16(const Float16Type& val); + bool operator == (const LogicalType & rhs) const { if (__isset.STRING != rhs.__isset.STRING) @@ -1383,6 +1422,10 @@ class LogicalType : public virtual ::apache::thrift::TBase { return false; else if (__isset.UUID && !(UUID == rhs.UUID)) return false; + if (__isset.FLOAT16 != rhs.__isset.FLOAT16) + return false; + else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16)) + return false; return true; } bool operator != (const LogicalType &rhs) const { diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index 88e44c96cc24c..d802166be66e8 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -234,6 +234,7 @@ struct MapType {} // see LogicalTypes.md struct ListType {} // see LogicalTypes.md struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 struct DateType {} // allowed for INT32 +struct Float16Type{} // allowed for FIXED[2], must encode raw FLOAT16 bytes /** * Logical type to annotate a column that is always null. @@ -344,6 +345,7 @@ union LogicalType { 12: JsonType JSON // use ConvertedType JSON 13: BsonType BSON // use ConvertedType BSON 14: UUIDType UUID // no compatible ConvertedType + 15: Float16Type FLOAT16 // no compatible ConvertedType } /** From 0909cd14f3bc0cbadd2ce4f66cbde3a0fbe205b2 Mon Sep 17 00:00:00 2001 From: benibus Date: Wed, 14 Jun 2023 14:50:15 -0400 Subject: [PATCH 02/37] Implement LogicalType class --- cpp/src/parquet/types.cc | 26 ++++++++++++++++++++++++++ cpp/src/parquet/types.h | 13 +++++++++++++ 2 files changed, 39 insertions(+) diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 3127b60e5d1ae..04a0fc2e0117b 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -441,6 +441,8 @@ std::shared_ptr LogicalType::FromThrift( return BSONLogicalType::Make(); } else if (type.__isset.UUID) { return UUIDLogicalType::Make(); + } else if (type.__isset.FLOAT16) { + return Float16LogicalType::Make(); } else { throw ParquetException("Metadata contains Thrift LogicalType that is not recognized"); } @@ -494,6 +496,10 @@ std::shared_ptr LogicalType::BSON() { return BSONLogicalType: std::shared_ptr LogicalType::UUID() { return UUIDLogicalType::Make(); } +std::shared_ptr LogicalType::Float16() { + return Float16LogicalType::Make(); +} + std::shared_ptr LogicalType::None() { return NoLogicalType::Make(); } /* @@ -575,6 +581,7 @@ class LogicalType::Impl { class JSON; class BSON; class UUID; + class Float16; class No; class Undefined; @@ -644,6 +651,9 @@ bool LogicalType::is_null() const { return impl_->type() == LogicalType::Type::N bool LogicalType::is_JSON() const { return impl_->type() == LogicalType::Type::JSON; } bool LogicalType::is_BSON() const { return impl_->type() == LogicalType::Type::BSON; } bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::UUID; } +bool LogicalType::is_float16() const { + return impl_->type() == LogicalType::Type::FLOAT16; +} bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; } bool LogicalType::is_valid() const { return impl_->type() != LogicalType::Type::UNDEFINED; @@ -1557,6 +1567,22 @@ class LogicalType::Impl::UUID final : public LogicalType::Impl::Incompatible, GENERATE_MAKE(UUID) +class LogicalType::Impl::Float16 final : public LogicalType::Impl::Incompatible, + public LogicalType::Impl::TypeLengthApplicable { + public: + friend class Float16LogicalType; + + OVERRIDE_TOSTRING(Float16) + OVERRIDE_TOTHRIFT(Float16Type, FLOAT16) + + private: + Float16() + : LogicalType::Impl(LogicalType::Type::FLOAT16, SortOrder::SIGNED), + LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 2) {} +}; + +GENERATE_MAKE(Float16) + class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible, public LogicalType::Impl::UniversalApplicable { public: diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 0315376a883e9..76dd0efc7cb4a 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -157,6 +157,7 @@ class PARQUET_EXPORT LogicalType { JSON, BSON, UUID, + FLOAT16, NONE // Not a real logical type; should always be last element }; }; @@ -210,6 +211,7 @@ class PARQUET_EXPORT LogicalType { static std::shared_ptr JSON(); static std::shared_ptr BSON(); static std::shared_ptr UUID(); + static std::shared_ptr Float16(); /// \brief Create a placeholder for when no logical type is specified static std::shared_ptr None(); @@ -263,6 +265,7 @@ class PARQUET_EXPORT LogicalType { bool is_JSON() const; bool is_BSON() const; bool is_UUID() const; + bool is_float16() const; bool is_none() const; /// \brief Return true if this logical type is of a known type. bool is_valid() const; @@ -433,6 +436,16 @@ class PARQUET_EXPORT UUIDLogicalType : public LogicalType { UUIDLogicalType() = default; }; +/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 2, +/// must encode raw FLOAT16 bytes. +class PARQUET_EXPORT Float16LogicalType : public LogicalType { + public: + static std::shared_ptr Make(); + + private: + Float16LogicalType() = default; +}; + /// \brief Allowed for any physical type. class PARQUET_EXPORT NoLogicalType : public LogicalType { public: From 31135737ade20e40bc79594b4117d9fb77fdf796 Mon Sep 17 00:00:00 2001 From: benibus Date: Wed, 14 Jun 2023 14:52:55 -0400 Subject: [PATCH 03/37] Implement column statistics --- cpp/src/parquet/float_internal.h | 61 +++++ cpp/src/parquet/statistics.cc | 144 ++++++++++-- cpp/src/parquet/statistics_test.cc | 343 +++++++++++++++++++++-------- 3 files changed, 442 insertions(+), 106 deletions(-) create mode 100644 cpp/src/parquet/float_internal.h diff --git a/cpp/src/parquet/float_internal.h b/cpp/src/parquet/float_internal.h new file mode 100644 index 0000000000000..c82c9d575ce3b --- /dev/null +++ b/cpp/src/parquet/float_internal.h @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/util/bit_util.h" +#include "arrow/util/ubsan.h" +#include "parquet/types.h" + +namespace parquet { + +struct float16 { + constexpr static uint16_t min() { return 0b1111101111111111; } + constexpr static uint16_t max() { return 0b0111101111111111; } + constexpr static uint16_t positive_zero() { return 0b0000000000000000; } + constexpr static uint16_t negative_zero() { return 0b1000000000000000; } + + static uint8_t* min_ptr() { return min_; } + static uint8_t* max_ptr() { return max_; } + static uint8_t* positive_zero_ptr() { return positive_zero_; } + static uint8_t* negative_zero_ptr() { return negative_zero_; } + + static bool is_nan(uint16_t n) { return (n & 0x7c00) == 0x7c00 && (n & 0x03ff) != 0; } + static bool is_zero(uint16_t n) { return (n & 0x7fff) == 0; } + static bool signbit(uint16_t n) { return (n & 0x8000) != 0; } + + static uint16_t Pack(const uint8_t* src) { + return ::arrow::bit_util::FromLittleEndian(::arrow::util::SafeLoadAs(src)); + } + static uint16_t Pack(const FLBA& src) { return Pack(src.ptr); } + + static uint8_t* Unpack(uint16_t src, uint8_t* dest) { + src = ::arrow::bit_util::ToLittleEndian(src); + return static_cast(std::memcpy(dest, &src, sizeof(src))); + } + + private: + static inline uint8_t min_[] = {0b11111111, 0b11111011}; + static inline uint8_t max_[] = {0b11111111, 0b01111011}; + static inline uint8_t positive_zero_[] = {0b00000000, 0b00000000}; + static inline uint8_t negative_zero_[] = {0b00000000, 0b10000000}; +}; + +} // namespace parquet diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index ccfb69c487d40..a3a486539710e 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -35,6 +35,7 @@ #include "arrow/visit_data_inline.h" #include "parquet/encoding.h" #include "parquet/exception.h" +#include "parquet/float_internal.h" #include "parquet/platform.h" #include "parquet/schema.h" @@ -277,11 +278,54 @@ template struct CompareHelper : public BinaryLikeCompareHelperBase {}; +struct Float16CompareHelper { + using T = FLBA; + + static T DefaultMin() { return T{float16::max_ptr()}; } + static T DefaultMax() { return T{float16::min_ptr()}; } + + static T Coalesce(T val, T fallback) { + return val.ptr != nullptr && float16::is_nan(float16::Pack(val)) ? fallback : val; + } + + static inline bool Compare(int type_length, const T& a, const T& b) { + uint16_t l = float16::Pack(a); + uint16_t r = float16::Pack(b); + + if (l & 0x8000) { + if (r & 0x8000) { + // Both are negative + return (l & 0x7fff) > (r & 0x7fff); + } else { + // Handle +/-0 + return (l & 0x7fff) || r != 0; + } + } else if (r & 0x8000) { + return false; + } else { + // Both are positive + return (l & 0x7fff) < (r & 0x7fff); + } + } + + static T Min(int type_length, const T& a, const T& b) { + if (a.ptr == nullptr) return b; + if (b.ptr == nullptr) return a; + return Compare(type_length, a, b) ? a : b; + } + + static T Max(int type_length, const T& a, const T& b) { + if (a.ptr == nullptr) return b; + if (b.ptr == nullptr) return a; + return Compare(type_length, a, b) ? b : a; + } +}; + using ::std::optional; template ::arrow::enable_if_t::value, optional>> -CleanStatistic(std::pair min_max) { +CleanStatistic(std::pair min_max, LogicalType::Type::type) { return min_max; } @@ -292,7 +336,7 @@ CleanStatistic(std::pair min_max) { // - If max is -0.0f, replace with 0.0f template ::arrow::enable_if_t::value, optional>> -CleanStatistic(std::pair min_max) { +CleanStatistic(std::pair min_max, LogicalType::Type::type) { T min = min_max.first; T max = min_max.second; @@ -318,26 +362,55 @@ CleanStatistic(std::pair min_max) { return {{min, max}}; } -optional> CleanStatistic(std::pair min_max) { +optional> CleanFloat16Statistic(std::pair min_max) { + FLBA min = min_max.first; + FLBA max = min_max.second; + uint16_t min_packed = float16::Pack(min); + uint16_t max_packed = float16::Pack(max); + + if (float16::is_nan(min_packed) || float16::is_nan(max_packed)) { + return ::std::nullopt; + } + + if (min_packed == float16::max() && max_packed == float16::min()) { + return ::std::nullopt; + } + + if (min_packed == float16::positive_zero()) { + min = FLBA{float16::negative_zero_ptr()}; + } + if (max_packed == float16::negative_zero()) { + max = FLBA{float16::positive_zero_ptr()}; + } + + return {{min, max}}; +} + +optional> CleanStatistic(std::pair min_max, + LogicalType::Type::type logical_type) { if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) { return ::std::nullopt; } + if (logical_type == LogicalType::Type::FLOAT16) { + return CleanFloat16Statistic(std::move(min_max)); + } return min_max; } optional> CleanStatistic( - std::pair min_max) { + std::pair min_max, LogicalType::Type::type) { if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) { return ::std::nullopt; } return min_max; } -template +template > class TypedComparatorImpl : virtual public TypedComparator { public: using T = typename DType::c_type; - using Helper = CompareHelper; + using Helper = HelperType; explicit TypedComparatorImpl(int type_length = -1) : type_length_(type_length) {} @@ -412,9 +485,9 @@ TypedComparatorImpl::GetMinMax(const int32_t* va return {SafeCopy(min), SafeCopy(max)}; } -template +template std::pair -TypedComparatorImpl::GetMinMax(const ::arrow::Array& values) { +TypedComparatorImpl::GetMinMax(const ::arrow::Array& values) { ParquetException::NYI(values.type()->ToString()); } @@ -458,6 +531,16 @@ std::pair TypedComparatorImpl::GetMi return GetMinMaxBinaryHelper(*this, values); } +static LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) { + if (const auto& logical_type = descr->logical_type()) { + return logical_type->type(); + } + return LogicalType::Type::NONE; +} +static LogicalType::Type::type LogicalTypeId(const Statistics& stats) { + return LogicalTypeId(stats.descr()); +} + template class TypedStatisticsImpl : public TypedStatistics { public: @@ -469,8 +552,7 @@ class TypedStatisticsImpl : public TypedStatistics { pool_(pool), min_buffer_(AllocateBuffer(pool_, 0)), max_buffer_(AllocateBuffer(pool_, 0)) { - auto comp = Comparator::Make(descr); - comparator_ = std::static_pointer_cast>(comp); + comparator_ = MakeComparator(descr); TypedStatisticsImpl::Reset(); } @@ -530,6 +612,19 @@ class TypedStatisticsImpl : public TypedStatistics { bool Equals(const Statistics& raw_other) const override { if (physical_type() != raw_other.physical_type()) return false; + const auto logical_id = LogicalTypeId(*this); + switch (logical_id) { + // Only compare against logical types that influence the interpretation of the + // physical type + case LogicalType::Type::FLOAT16: + if (LogicalTypeId(raw_other) != logical_id) { + return false; + } + break; + default: + break; + } + const auto& other = checked_cast(raw_other); if (has_min_max_ != other.has_min_max_) return false; @@ -686,7 +781,7 @@ class TypedStatisticsImpl : public TypedStatistics { void SetMinMaxPair(std::pair min_max) { // CleanStatistic can return a nullopt in case of erroneous values, e.g. NaN - auto maybe_min_max = CleanStatistic(min_max); + auto maybe_min_max = CleanStatistic(min_max, LogicalTypeId(*this)); if (!maybe_min_max) return; auto min = maybe_min_max.value().first; @@ -795,12 +890,8 @@ void TypedStatisticsImpl::PlainDecode(const std::string& src, dst->ptr = reinterpret_cast(src.c_str()); } -} // namespace - -// ---------------------------------------------------------------------- -// Public factory functions - -std::shared_ptr Comparator::Make(Type::type physical_type, +std::shared_ptr DoMakeComparator(Type::type physical_type, + LogicalType::Type::type logical_type, SortOrder::type sort_order, int type_length) { if (SortOrder::SIGNED == sort_order) { @@ -820,6 +911,10 @@ std::shared_ptr Comparator::Make(Type::type physical_type, case Type::BYTE_ARRAY: return std::make_shared>(); case Type::FIXED_LEN_BYTE_ARRAY: + if (logical_type == LogicalType::Type::FLOAT16) { + return std::make_shared< + TypedComparatorImpl>(); + } return std::make_shared>(type_length); default: ParquetException::NYI("Signed Compare not implemented"); @@ -845,8 +940,21 @@ std::shared_ptr Comparator::Make(Type::type physical_type, return nullptr; } +} // namespace + +// ---------------------------------------------------------------------- +// Public factory functions + +std::shared_ptr Comparator::Make(Type::type physical_type, + SortOrder::type sort_order, + int type_length) { + return DoMakeComparator(physical_type, LogicalType::Type::NONE, sort_order, + type_length); +} + std::shared_ptr Comparator::Make(const ColumnDescriptor* descr) { - return Make(descr->physical_type(), descr->sort_order(), descr->type_length()); + return DoMakeComparator(descr->physical_type(), LogicalTypeId(descr), + descr->sort_order(), descr->type_length()); } std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 637832945ec57..4dc77e771bded 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -40,6 +40,7 @@ #include "parquet/column_writer.h" #include "parquet/file_reader.h" #include "parquet/file_writer.h" +#include "parquet/float_internal.h" #include "parquet/platform.h" #include "parquet/schema.h" #include "parquet/statistics.h" @@ -875,9 +876,22 @@ TEST(CorrectStatistics, Basics) { // Test SortOrder class static const int NUM_VALUES = 10; -template +template +struct RebindLogical { + using ParquetType = T; + using CType = typename T::c_type; +}; + +template <> +struct RebindLogical { + using ParquetType = FLBAType; + using CType = ParquetType::c_type; +}; + +template class TestStatisticsSortOrder : public ::testing::Test { public: + using TestType = typename RebindLogical::ParquetType; using c_type = typename TestType::c_type; void SetUp() override { @@ -955,7 +969,7 @@ class TestStatisticsSortOrder : public ::testing::Test { }; using CompareTestTypes = ::testing::Types; + ByteArrayType, FLBAType, Float16LogicalType>; // TYPE::INT32 template <> @@ -1102,6 +1116,36 @@ void TestStatisticsSortOrder::SetValues() { .set_max(std::string(reinterpret_cast(&vals[8][0]), FLBA_LENGTH)); } +template <> +void TestStatisticsSortOrder::AddNodes(std::string name) { + auto node = + schema::PrimitiveNode::Make(name, Repetition::REQUIRED, LogicalType::Float16(), + Type::FIXED_LEN_BYTE_ARRAY, sizeof(uint16_t)); + fields_.push_back(std::move(node)); +} + +template <> +void TestStatisticsSortOrder::SetValues() { + constexpr int kValueLen = 2; + constexpr int kNumBytes = NUM_VALUES * kValueLen; + + const uint16_t packed_vals[NUM_VALUES] = { + 0b0000000000000000, 0b0000000000000000, 0b1000000000000000, 0b1000010000000000, + 0b0111110000001000, 0b1000000000000000, 0b0000010000000000, 0b0000000001000000, + 0b1111110000001000, 0b1000000001000000}; + + values_buf_.resize(kNumBytes); + uint8_t* ptr = values_buf_.data(); + for (int i = 0; i < NUM_VALUES; ++i) { + values_[i].ptr = float16::Unpack(packed_vals[i], ptr); + ptr += kValueLen; + } + + stats_[0] + .set_min(std::string(reinterpret_cast(values_[3].ptr), kValueLen)) + .set_max(std::string(reinterpret_cast(values_[6].ptr), kValueLen)); +} + TYPED_TEST_SUITE(TestStatisticsSortOrder, CompareTestTypes); TYPED_TEST(TestStatisticsSortOrder, MinMax) { @@ -1167,12 +1211,20 @@ TEST_F(TestStatisticsSortOrderFLBA, UnknownSortOrder) { ASSERT_FALSE(cc_metadata->is_stats_set()); } +template +static std::string EncodeValue(const T& val) { + return std::string(reinterpret_cast(&val), sizeof(val)); +} +static std::string EncodeValue(const FLBA& val, int length = sizeof(uint16_t)) { + return std::string(reinterpret_cast(val.ptr), length); +} + template void AssertMinMaxAre(Stats stats, const Array& values, T expected_min, T expected_max) { stats->Update(values.data(), values.size(), 0); ASSERT_TRUE(stats->HasMinMax()); - EXPECT_EQ(stats->min(), expected_min); - EXPECT_EQ(stats->max(), expected_max); + EXPECT_EQ(stats->EncodeMin(), EncodeValue(expected_min)); + EXPECT_EQ(stats->EncodeMax(), EncodeValue(expected_max)); } template @@ -1184,8 +1236,8 @@ void AssertMinMaxAre(Stats stats, const Array& values, const uint8_t* valid_bitm stats->UpdateSpaced(values.data(), valid_bitmap, 0, non_null_count + null_count, non_null_count, null_count); ASSERT_TRUE(stats->HasMinMax()); - EXPECT_EQ(stats->min(), expected_min); - EXPECT_EQ(stats->max(), expected_max); + EXPECT_EQ(stats->EncodeMin(), EncodeValue(expected_min)); + EXPECT_EQ(stats->EncodeMax(), EncodeValue(expected_max)); } template @@ -1268,50 +1320,217 @@ void CheckExtrema() { TEST(TestStatistic, Int32Extrema) { CheckExtrema(); } TEST(TestStatistic, Int64Extrema) { CheckExtrema(); } -// PARQUET-1225: Float NaN values may lead to incorrect min-max -template -void CheckNaNs() { - using T = typename ParquetType::c_type; +template +class TestFloatStatistics : public ::testing::Test { + public: + using ParquetType = typename RebindLogical::ParquetType; + using c_type = typename ParquetType::c_type; + + void Init(); + void SetUp() override { this->Init(); } + + bool signbit(c_type val); + void CheckEq(const c_type& l, const c_type& r); + NodePtr MakeNode(const std::string& name, Repetition::type rep); + + template + void CheckMinMaxZeroesSign(Stats stats, const Values& values) { + stats->Update(values.data(), values.size(), 0); + ASSERT_TRUE(stats->HasMinMax()); + + this->CheckEq(stats->min(), positive_zero_); + ASSERT_TRUE(this->signbit(stats->min())); + + this->CheckEq(stats->max(), positive_zero_); + ASSERT_FALSE(this->signbit(stats->max())); + } + + // ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in + // parquet-mr + void TestNegativeZeroes() { + NodePtr node = this->MakeNode("f", Repetition::OPTIONAL); + ColumnDescriptor descr(node, 1, 1); + { + std::array values{negative_zero_, positive_zero_}; + auto stats = MakeStatistics(&descr); + CheckMinMaxZeroesSign(stats, values); + } + + { + std::array values{positive_zero_, negative_zero_}; + auto stats = MakeStatistics(&descr); + CheckMinMaxZeroesSign(stats, values); + } + + { + std::array values{negative_zero_, negative_zero_}; + auto stats = MakeStatistics(&descr); + CheckMinMaxZeroesSign(stats, values); + } + + { + std::array values{positive_zero_, positive_zero_}; + auto stats = MakeStatistics(&descr); + CheckMinMaxZeroesSign(stats, values); + } + } + + // PARQUET-1225: Float NaN values may lead to incorrect min-max + template + void CheckNaNs(ColumnDescriptor* descr, const Values& all_nans, const Values& some_nans, + const Values& other_nans, c_type min, c_type max, uint8_t valid_bitmap, + uint8_t valid_bitmap_no_nans) { + auto some_nan_stats = MakeStatistics(descr); + // Ingesting only nans should not yield valid min max + AssertUnsetMinMax(some_nan_stats, all_nans); + // Ingesting a mix of NaNs and non-NaNs should not yield valid min max. + AssertMinMaxAre(some_nan_stats, some_nans, min, max); + // Ingesting only nans after a valid min/max, should have not effect + AssertMinMaxAre(some_nan_stats, all_nans, min, max); + + some_nan_stats = MakeStatistics(descr); + AssertUnsetMinMax(some_nan_stats, all_nans, &valid_bitmap); + // NaNs should not pollute min max when excluded via null bitmap. + AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap_no_nans, min, max); + // Ingesting NaNs with a null bitmap should not change the result. + AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap, min, max); + + // An array that doesn't start with NaN + auto other_stats = MakeStatistics(descr); + AssertMinMaxAre(other_stats, other_nans, min, max); + } + + void TestNaNs(); + + protected: + std::vector data_buf_; + c_type positive_zero_; + c_type negative_zero_; +}; + +template +void TestFloatStatistics::Init() { + positive_zero_ = c_type{}; + negative_zero_ = -positive_zero_; +} +template <> +void TestFloatStatistics::Init() { + positive_zero_ = c_type{float16::positive_zero_ptr()}; + negative_zero_ = c_type{float16::negative_zero_ptr()}; +} + +template +NodePtr TestFloatStatistics::MakeNode(const std::string& name, Repetition::type rep) { + return PrimitiveNode::Make(name, rep, ParquetType::type_num); +} +template <> +NodePtr TestFloatStatistics::MakeNode(const std::string& name, + Repetition::type rep) { + return PrimitiveNode::Make(name, rep, LogicalType::Float16(), + Type::FIXED_LEN_BYTE_ARRAY, 2); +} + +template +void TestFloatStatistics::CheckEq(const c_type& l, const c_type& r) { + ASSERT_EQ(l, r); +} +template <> +void TestFloatStatistics::CheckEq(const c_type& a, const c_type& b) { + auto l = float16::Pack(a); + auto r = float16::Pack(b); + if (float16::is_zero(l) && float16::is_zero(r)) return; + ASSERT_EQ(l, r); +} + +template +bool TestFloatStatistics::signbit(c_type val) { + return std::signbit(val); +} +template <> +bool TestFloatStatistics::signbit(c_type val) { + return float16::signbit(float16::Pack(val)); +} + +template +void TestFloatStatistics::TestNaNs() { constexpr int kNumValues = 8; - NodePtr node = PrimitiveNode::Make("f", Repetition::OPTIONAL, ParquetType::type_num); + NodePtr node = this->MakeNode("f", Repetition::OPTIONAL); ColumnDescriptor descr(node, 1, 1); - constexpr T nan = std::numeric_limits::quiet_NaN(); - constexpr T min = -4.0f; - constexpr T max = 3.0f; + constexpr c_type nan = std::numeric_limits::quiet_NaN(); + constexpr c_type min = -4.0f; + constexpr c_type max = 3.0f; + + std::array all_nans{nan, nan, nan, nan, nan, nan, nan, nan}; + std::array some_nans{nan, max, -3.0f, -1.0f, nan, 2.0f, min, nan}; + std::array other_nans{1.5f, max, -3.0f, -1.0f, nan, 2.0f, min, nan}; - std::array all_nans{nan, nan, nan, nan, nan, nan, nan, nan}; - std::array some_nans{nan, max, -3.0f, -1.0f, nan, 2.0f, min, nan}; uint8_t valid_bitmap = 0x7F; // 0b01111111 // NaNs excluded uint8_t valid_bitmap_no_nans = 0x6E; // 0b01101110 - // Test values - auto some_nan_stats = MakeStatistics(&descr); - // Ingesting only nans should not yield valid min max - AssertUnsetMinMax(some_nan_stats, all_nans); - // Ingesting a mix of NaNs and non-NaNs should not yield valid min max. - AssertMinMaxAre(some_nan_stats, some_nans, min, max); - // Ingesting only nans after a valid min/max, should have not effect - AssertMinMaxAre(some_nan_stats, all_nans, min, max); + this->CheckNaNs(&descr, all_nans, some_nans, other_nans, min, max, valid_bitmap, + valid_bitmap_no_nans); +} - some_nan_stats = MakeStatistics(&descr); - AssertUnsetMinMax(some_nan_stats, all_nans, &valid_bitmap); - // NaNs should not pollute min max when excluded via null bitmap. - AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap_no_nans, min, max); - // Ingesting NaNs with a null bitmap should not change the result. - AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap, min, max); +template <> +void TestFloatStatistics::TestNaNs() { + constexpr int kNumValues = 8; + constexpr int kValueLen = sizeof(uint16_t); + + NodePtr node = this->MakeNode("f", Repetition::OPTIONAL); + ColumnDescriptor descr(node, 1, 1); + + const uint16_t nan_int = 0b1111110010101010; + const uint16_t min_int = 0b1010010111000110; + const uint16_t max_int = 0b0011100011010011; + uint8_t min_max_data[2 * kValueLen]; + const auto min = FLBA{float16::Unpack(min_int, &min_max_data[0 * kValueLen])}; + const auto max = FLBA{float16::Unpack(max_int, &min_max_data[1 * kValueLen])}; + + std::array all_nans_packed = {nan_int, nan_int, nan_int, nan_int, + nan_int, nan_int, nan_int, nan_int}; + std::array some_nans_packed = {nan_int, + max_int, + 0b1000111000110000, + 0b1000010001000001, + nan_int, + 0b0000100000011110, + min_int, + nan_int}; + std::array other_nans_packed = some_nans_packed; + other_nans_packed[0] = 0b0000010000110011; + + std::array bytes; + uint8_t* at = bytes.data(); + auto prepare_values = [&](const auto& packed_values) -> std::vector { + std::vector out; + for (uint16_t packed : packed_values) { + out.push_back(FLBA{float16::Unpack(packed, at)}); + at += kValueLen; + } + return out; + }; - // An array that doesn't start with NaN - std::array other_nans{1.5f, max, -3.0f, -1.0f, nan, 2.0f, min, nan}; - auto other_stats = MakeStatistics(&descr); - AssertMinMaxAre(other_stats, other_nans, min, max); + auto all_nans = prepare_values(all_nans_packed); + auto some_nans = prepare_values(some_nans_packed); + auto other_nans = prepare_values(other_nans_packed); + + uint8_t valid_bitmap = 0x7F; // 0b01111111 + // NaNs excluded + uint8_t valid_bitmap_no_nans = 0x6E; // 0b01101110 + + this->CheckNaNs(&descr, all_nans, some_nans, other_nans, min, max, valid_bitmap, + valid_bitmap_no_nans); } -TEST(TestStatistic, NaNFloatValues) { CheckNaNs(); } +using FloatingPointTypes = ::testing::Types; + +TYPED_TEST_SUITE(TestFloatStatistics, FloatingPointTypes); -TEST(TestStatistic, NaNDoubleValues) { CheckNaNs(); } +TYPED_TEST(TestFloatStatistics, NegativeZeros) { this->TestNegativeZeroes(); } +TYPED_TEST(TestFloatStatistics, NaNs) { this->TestNaNs(); } // ARROW-7376 TEST(TestStatisticsSortOrderFloatNaN, NaNAndNullsInfiniteLoop) { @@ -1327,58 +1546,6 @@ TEST(TestStatisticsSortOrderFloatNaN, NaNAndNullsInfiniteLoop) { AssertUnsetMinMax(stats, nans_but_last, &all_but_last_valid); } -template -void AssertMinMaxZeroesSign(Stats stats, const Array& values) { - stats->Update(values.data(), values.size(), 0); - ASSERT_TRUE(stats->HasMinMax()); - - T zero{}; - ASSERT_EQ(stats->min(), zero); - ASSERT_TRUE(std::signbit(stats->min())); - - ASSERT_EQ(stats->max(), zero); - ASSERT_FALSE(std::signbit(stats->max())); -} - -// ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in -// parquet-mr -template -void CheckNegativeZeroStats() { - using T = typename ParquetType::c_type; - - NodePtr node = PrimitiveNode::Make("f", Repetition::OPTIONAL, ParquetType::type_num); - ColumnDescriptor descr(node, 1, 1); - T zero{}; - - { - std::array values{-zero, zero}; - auto stats = MakeStatistics(&descr); - AssertMinMaxZeroesSign(stats, values); - } - - { - std::array values{zero, -zero}; - auto stats = MakeStatistics(&descr); - AssertMinMaxZeroesSign(stats, values); - } - - { - std::array values{-zero, -zero}; - auto stats = MakeStatistics(&descr); - AssertMinMaxZeroesSign(stats, values); - } - - { - std::array values{zero, zero}; - auto stats = MakeStatistics(&descr); - AssertMinMaxZeroesSign(stats, values); - } -} - -TEST(TestStatistics, FloatNegativeZero) { CheckNegativeZeroStats(); } - -TEST(TestStatistics, DoubleNegativeZero) { CheckNegativeZeroStats(); } - // Test statistics for binary column with UNSIGNED sort order TEST(TestStatisticsSortOrderMinMax, Unsigned) { std::string dir_string(test::get_data_dir()); From 046e967d98c9a2ca938a612f96816cad58c02b52 Mon Sep 17 00:00:00 2001 From: Ben Harkins <60872452+benibus@users.noreply.github.com> Date: Thu, 15 Jun 2023 11:26:14 -0400 Subject: [PATCH 04/37] Apply suggestion from code review Co-authored-by: Antoine Pitrou --- cpp/src/parquet/statistics_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 4dc77e771bded..d22a0bc681ade 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -1335,7 +1335,7 @@ class TestFloatStatistics : public ::testing::Test { template void CheckMinMaxZeroesSign(Stats stats, const Values& values) { - stats->Update(values.data(), values.size(), 0); + stats->Update(values.data(), values.size(), /*null_count=*/0); ASSERT_TRUE(stats->HasMinMax()); this->CheckEq(stats->min(), positive_zero_); From 66efa36f54f0c1982b409c0c1d2a1b1317d2818f Mon Sep 17 00:00:00 2001 From: benibus Date: Fri, 16 Jun 2023 17:19:55 -0400 Subject: [PATCH 05/37] Add Float16 utils to Arrow --- cpp/src/arrow/util/float16.h | 158 +++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 cpp/src/arrow/util/float16.h diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h new file mode 100644 index 0000000000000..fedba0c29dc75 --- /dev/null +++ b/cpp/src/arrow/util/float16.h @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "arrow/util/bit_util.h" +#include "arrow/util/ubsan.h" +#include "arrow/util/visibility.h" + +namespace arrow { +namespace util { + +/// \brief Base class for an IEEE half-precision float, encoded as a `uint16_t` +/// +/// The exact format is as follows (from MSB to LSB): +/// - bit 0: sign +/// - bits 1-5: exponent +/// - bits 6-15: mantissa +/// +/// NOTE: Methods in the class should not mutate the unerlying value or produce copies. +/// Such functionality is delegated to subclasses. +class ARROW_EXPORT Float16Base { + public: + Float16Base() = default; + constexpr explicit Float16Base(uint16_t value) : value_(value) {} + + constexpr uint16_t bits() const { return value_; } + constexpr explicit operator uint16_t() const { return bits(); } + + constexpr bool signbit() const { return (value_ & 0x8000) != 0; } + + constexpr bool is_nan() const { + return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0; + } + constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } + constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } + + void ToBytes(uint8_t* dest) const { + auto value = bit_util::ToLittleEndian(value_); + std::memcpy(dest, &value, sizeof(value)); + } + std::array ToBytes() const { + std::array bytes; + ToBytes(bytes.data()); + return bytes; + } + + friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return Float16Base::CompareEq(lhs, rhs); + } + friend constexpr bool operator!=(Float16Base lhs, Float16Base rhs) { + return !(lhs == rhs); + } + + friend constexpr bool operator<(Float16Base lhs, Float16Base rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return Float16Base::CompareLt(lhs, rhs); + } + friend constexpr bool operator>(Float16Base lhs, Float16Base rhs) { return rhs < lhs; } + + friend constexpr bool operator<=(Float16Base lhs, Float16Base rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return !Float16Base::CompareLt(rhs, lhs); + } + friend constexpr bool operator>=(Float16Base lhs, Float16Base rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return !Float16Base::CompareLt(lhs, rhs); + } + + friend std::ostream& operator<<(std::ostream& os, Float16Base arg) { + return (os << arg.bits()); + } + + protected: + uint16_t value_; + + private: + // Comparison helpers that assume neither operand is NaN + static constexpr bool CompareEq(Float16Base lhs, Float16Base rhs) { + return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero()); + } + static constexpr bool CompareLt(Float16Base lhs, Float16Base rhs) { + if (lhs.signbit()) { + if (rhs.signbit()) { + // Both are negative + return (lhs.bits() & 0x7fff) > (rhs.bits() & 0x7fff); + } else { + // Handle +/-0 + return !lhs.is_zero() || rhs.bits() != 0; + } + } else if (rhs.signbit()) { + return false; + } else { + // Both are positive + return (lhs.bits() & 0x7fff) < (rhs.bits() & 0x7fff); + } + } +}; + +/// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t` +class ARROW_EXPORT Float16 : public Float16Base { + public: + using Float16Base::Float16Base; + + constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } + constexpr Float16 operator+() const { return Float16(value_); } + + static Float16 FromBytes(const uint8_t* src) { + return Float16(bit_util::FromLittleEndian(SafeLoadAs(src))); + } +}; + +static_assert(std::is_trivial_v); + +} // namespace util +} // namespace arrow + +// TODO: Not complete +template <> +class std::numeric_limits { + using T = arrow::util::Float16; + + public: + static constexpr bool is_specialized = true; + static constexpr bool is_signed = true; + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + + static constexpr T min() { return T(0b0000010000000000); } + static constexpr T max() { return T(0b0111101111111111); } + static constexpr T lowest() { return -max(); } + + static constexpr T infinity() { return T(0b0111110000000000); } + + static constexpr T quiet_NaN() { return T(0b0111111111111111); } +}; From e51d0d157db8768185ac7d1d97aa269253d2db5e Mon Sep 17 00:00:00 2001 From: benibus Date: Fri, 16 Jun 2023 17:27:59 -0400 Subject: [PATCH 06/37] Replace float_internal.h --- cpp/src/parquet/float_internal.h | 61 --------------- cpp/src/parquet/statistics.cc | 72 +++++++++-------- cpp/src/parquet/statistics_test.cc | 120 ++++++++++++++++++----------- 3 files changed, 114 insertions(+), 139 deletions(-) delete mode 100644 cpp/src/parquet/float_internal.h diff --git a/cpp/src/parquet/float_internal.h b/cpp/src/parquet/float_internal.h deleted file mode 100644 index c82c9d575ce3b..0000000000000 --- a/cpp/src/parquet/float_internal.h +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/util/bit_util.h" -#include "arrow/util/ubsan.h" -#include "parquet/types.h" - -namespace parquet { - -struct float16 { - constexpr static uint16_t min() { return 0b1111101111111111; } - constexpr static uint16_t max() { return 0b0111101111111111; } - constexpr static uint16_t positive_zero() { return 0b0000000000000000; } - constexpr static uint16_t negative_zero() { return 0b1000000000000000; } - - static uint8_t* min_ptr() { return min_; } - static uint8_t* max_ptr() { return max_; } - static uint8_t* positive_zero_ptr() { return positive_zero_; } - static uint8_t* negative_zero_ptr() { return negative_zero_; } - - static bool is_nan(uint16_t n) { return (n & 0x7c00) == 0x7c00 && (n & 0x03ff) != 0; } - static bool is_zero(uint16_t n) { return (n & 0x7fff) == 0; } - static bool signbit(uint16_t n) { return (n & 0x8000) != 0; } - - static uint16_t Pack(const uint8_t* src) { - return ::arrow::bit_util::FromLittleEndian(::arrow::util::SafeLoadAs(src)); - } - static uint16_t Pack(const FLBA& src) { return Pack(src.ptr); } - - static uint8_t* Unpack(uint16_t src, uint8_t* dest) { - src = ::arrow::bit_util::ToLittleEndian(src); - return static_cast(std::memcpy(dest, &src, sizeof(src))); - } - - private: - static inline uint8_t min_[] = {0b11111111, 0b11111011}; - static inline uint8_t max_[] = {0b11111111, 0b01111011}; - static inline uint8_t positive_zero_[] = {0b00000000, 0b00000000}; - static inline uint8_t negative_zero_[] = {0b00000000, 0b10000000}; -}; - -} // namespace parquet diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index a3a486539710e..f15d6664bf220 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -30,18 +30,19 @@ #include "arrow/type_traits.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/float16.h" #include "arrow/util/logging.h" #include "arrow/util/ubsan.h" #include "arrow/visit_data_inline.h" #include "parquet/encoding.h" #include "parquet/exception.h" -#include "parquet/float_internal.h" #include "parquet/platform.h" #include "parquet/schema.h" using arrow::default_memory_pool; using arrow::MemoryPool; using arrow::internal::checked_cast; +using arrow::util::Float16; using arrow::util::SafeCopy; using arrow::util::SafeLoad; @@ -54,6 +55,25 @@ namespace { constexpr int value_length(int value_length, const ByteArray& value) { return value.len; } constexpr int value_length(int type_length, const FLBA& value) { return type_length; } +// Static "constants" for normalizing float16 min/max values. These need to be expressed +// as pointers because `Float16LogicalType` represents an FLBA. +const uint8_t* float16_lowest() { + static const auto bytes = std::numeric_limits::lowest().ToBytes(); + return bytes.data(); +} +const uint8_t* float16_max() { + static const auto bytes = std::numeric_limits::max().ToBytes(); + return bytes.data(); +} +const uint8_t* float16_positive_zero() { + static const auto bytes = Float16(0).ToBytes(); + return bytes.data(); +} +const uint8_t* float16_negative_zero() { + static const auto bytes = (-Float16(0)).ToBytes(); + return bytes.data(); +} + template struct CompareHelper { using T = typename DType::c_type; @@ -281,31 +301,18 @@ struct CompareHelper struct Float16CompareHelper { using T = FLBA; - static T DefaultMin() { return T{float16::max_ptr()}; } - static T DefaultMax() { return T{float16::min_ptr()}; } + static T DefaultMin() { return T{float16_max()}; } + static T DefaultMax() { return T{float16_lowest()}; } static T Coalesce(T val, T fallback) { - return val.ptr != nullptr && float16::is_nan(float16::Pack(val)) ? fallback : val; + return val.ptr != nullptr && Float16::FromBytes(val.ptr).is_nan() ? fallback : val; } static inline bool Compare(int type_length, const T& a, const T& b) { - uint16_t l = float16::Pack(a); - uint16_t r = float16::Pack(b); - - if (l & 0x8000) { - if (r & 0x8000) { - // Both are negative - return (l & 0x7fff) > (r & 0x7fff); - } else { - // Handle +/-0 - return (l & 0x7fff) || r != 0; - } - } else if (r & 0x8000) { - return false; - } else { - // Both are positive - return (l & 0x7fff) < (r & 0x7fff); - } + const auto lhs = Float16::FromBytes(a.ptr); + const auto rhs = Float16::FromBytes(b.ptr); + // NaN is handled here (same behavior as native float compare) + return lhs < rhs; } static T Min(int type_length, const T& a, const T& b) { @@ -363,27 +370,28 @@ CleanStatistic(std::pair min_max, LogicalType::Type::type) { } optional> CleanFloat16Statistic(std::pair min_max) { - FLBA min = min_max.first; - FLBA max = min_max.second; - uint16_t min_packed = float16::Pack(min); - uint16_t max_packed = float16::Pack(max); + FLBA min_flba = min_max.first; + FLBA max_flba = min_max.second; + Float16 min = Float16::FromBytes(min_flba.ptr); + Float16 max = Float16::FromBytes(max_flba.ptr); - if (float16::is_nan(min_packed) || float16::is_nan(max_packed)) { + if (min.is_nan() || max.is_nan()) { return ::std::nullopt; } - if (min_packed == float16::max() && max_packed == float16::min()) { + if (min == std::numeric_limits::max() && + max == std::numeric_limits::lowest()) { return ::std::nullopt; } - if (min_packed == float16::positive_zero()) { - min = FLBA{float16::negative_zero_ptr()}; + if (min == Float16(0)) { + min_flba = FLBA{float16_negative_zero()}; } - if (max_packed == float16::negative_zero()) { - max = FLBA{float16::positive_zero_ptr()}; + if (max == -Float16(0)) { + max_flba = FLBA{float16_positive_zero()}; } - return {{min, max}}; + return {{min_flba, max_flba}}; } optional> CleanStatistic(std::pair min_max, diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index d22a0bc681ade..24dac6cf2cd08 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -34,13 +34,13 @@ #include "arrow/type_traits.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" +#include "arrow/util/float16.h" #include "arrow/util/ubsan.h" #include "parquet/column_reader.h" #include "parquet/column_writer.h" #include "parquet/file_reader.h" #include "parquet/file_writer.h" -#include "parquet/float_internal.h" #include "parquet/platform.h" #include "parquet/schema.h" #include "parquet/statistics.h" @@ -50,6 +50,7 @@ using arrow::default_memory_pool; using arrow::MemoryPool; +using arrow::util::Float16; using arrow::util::SafeCopy; namespace bit_util = arrow::bit_util; @@ -62,6 +63,28 @@ using schema::PrimitiveNode; namespace test { +class BufferedFloat16 : public ::arrow::util::Float16Base { + public: + explicit BufferedFloat16(Float16 f16) : Float16Base(f16) { + buffer_ = *::arrow::AllocateBuffer(sizeof(value_)); + ToBytes(buffer_->mutable_data()); + } + explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {} + + const uint8_t* bytes() const { return buffer_->data(); } + const std::shared_ptr<::arrow::Buffer>& buffer() { return buffer_; } + + BufferedFloat16 operator+() const { return *this; } + BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); } + + static BufferedFloat16 FromBytes(const uint8_t* src) { + return BufferedFloat16(Float16::FromBytes(src)); + } + + private: + std::shared_ptr<::arrow::Buffer> buffer_; +}; + // ---------------------------------------------------------------------- // Test comparators @@ -1129,21 +1152,30 @@ void TestStatisticsSortOrder::SetValues() { constexpr int kValueLen = 2; constexpr int kNumBytes = NUM_VALUES * kValueLen; - const uint16_t packed_vals[NUM_VALUES] = { - 0b0000000000000000, 0b0000000000000000, 0b1000000000000000, 0b1000010000000000, - 0b0111110000001000, 0b1000000000000000, 0b0000010000000000, 0b0000000001000000, - 0b1111110000001000, 0b1000000001000000}; + const uint16_t u16_vals[NUM_VALUES] = { + 0b1100010100000000, // -5.0 + 0b1100010000000000, // -4.0 + 0b1100001000000000, // -3.0 + 0b1100000000000000, // -2.0 + 0b1011110000000000, // -1.0 + 0b0000000000000000, // +0.0 + 0b0011110000000000, // +1.0 + 0b0100000000000000, // +2.0 + 0b0100001000000000, // +3.0 + 0b0100010000000000, // +4.0 + }; values_buf_.resize(kNumBytes); uint8_t* ptr = values_buf_.data(); for (int i = 0; i < NUM_VALUES; ++i) { - values_[i].ptr = float16::Unpack(packed_vals[i], ptr); + Float16(u16_vals[i]).ToBytes(ptr); + values_[i].ptr = ptr; ptr += kValueLen; } stats_[0] - .set_min(std::string(reinterpret_cast(values_[3].ptr), kValueLen)) - .set_max(std::string(reinterpret_cast(values_[6].ptr), kValueLen)); + .set_min(std::string(reinterpret_cast(values_[0].ptr), kValueLen)) + .set_max(std::string(reinterpret_cast(values_[9].ptr), kValueLen)); } TYPED_TEST_SUITE(TestStatisticsSortOrder, CompareTestTypes); @@ -1416,8 +1448,11 @@ void TestFloatStatistics::Init() { } template <> void TestFloatStatistics::Init() { - positive_zero_ = c_type{float16::positive_zero_ptr()}; - negative_zero_ = c_type{float16::negative_zero_ptr()}; + data_buf_.resize(4); + (+Float16(0)).ToBytes(&data_buf_[0]); + positive_zero_ = FLBA{&data_buf_[0]}; + (-Float16(0)).ToBytes(&data_buf_[2]); + negative_zero_ = FLBA{&data_buf_[2]}; } template @@ -1437,9 +1472,8 @@ void TestFloatStatistics::CheckEq(const c_type& l, const c_type& r) { } template <> void TestFloatStatistics::CheckEq(const c_type& a, const c_type& b) { - auto l = float16::Pack(a); - auto r = float16::Pack(b); - if (float16::is_zero(l) && float16::is_zero(r)) return; + auto l = Float16::FromBytes(a.ptr); + auto r = Float16::FromBytes(b.ptr); ASSERT_EQ(l, r); } @@ -1449,7 +1483,7 @@ bool TestFloatStatistics::signbit(c_type val) { } template <> bool TestFloatStatistics::signbit(c_type val) { - return float16::signbit(float16::Pack(val)); + return Float16::FromBytes(val.ptr).signbit(); } template @@ -1477,45 +1511,39 @@ void TestFloatStatistics::TestNaNs() { template <> void TestFloatStatistics::TestNaNs() { constexpr int kNumValues = 8; - constexpr int kValueLen = sizeof(uint16_t); NodePtr node = this->MakeNode("f", Repetition::OPTIONAL); ColumnDescriptor descr(node, 1, 1); - const uint16_t nan_int = 0b1111110010101010; - const uint16_t min_int = 0b1010010111000110; - const uint16_t max_int = 0b0011100011010011; - uint8_t min_max_data[2 * kValueLen]; - const auto min = FLBA{float16::Unpack(min_int, &min_max_data[0 * kValueLen])}; - const auto max = FLBA{float16::Unpack(max_int, &min_max_data[1 * kValueLen])}; - - std::array all_nans_packed = {nan_int, nan_int, nan_int, nan_int, - nan_int, nan_int, nan_int, nan_int}; - std::array some_nans_packed = {nan_int, - max_int, - 0b1000111000110000, - 0b1000010001000001, - nan_int, - 0b0000100000011110, - min_int, - nan_int}; - std::array other_nans_packed = some_nans_packed; - other_nans_packed[0] = 0b0000010000110011; - - std::array bytes; - uint8_t* at = bytes.data(); - auto prepare_values = [&](const auto& packed_values) -> std::vector { - std::vector out; - for (uint16_t packed : packed_values) { - out.push_back(FLBA{float16::Unpack(packed, at)}); - at += kValueLen; - } + using F16 = BufferedFloat16; + const auto nan_f16 = F16(std::numeric_limits::quiet_NaN()); + const auto min_f16 = F16(0xc400); // -4.0 + const auto max_f16 = F16(0x4200); // +3.0 + + const auto min = FLBA{min_f16.bytes()}; + const auto max = FLBA{max_f16.bytes()}; + + std::array all_nans_f16 = {nan_f16, nan_f16, nan_f16, nan_f16, + nan_f16, nan_f16, nan_f16, nan_f16}; + std::array some_nans_f16 = {nan_f16, max_f16, + F16(0xc200), // -3.0 + F16(0xbc00), // -1.0 + nan_f16, + F16(0x4000), // +2.0 + min_f16, nan_f16}; + std::array other_nans_f16 = some_nans_f16; + other_nans_f16[0] = F16(0x3e00); // +1.5 + + auto prepare_values = [](const auto& values) -> std::vector { + std::vector out(values.size()); + std::transform(values.begin(), values.end(), out.begin(), + [](const F16& f16) { return FLBA{f16.bytes()}; }); return out; }; - auto all_nans = prepare_values(all_nans_packed); - auto some_nans = prepare_values(some_nans_packed); - auto other_nans = prepare_values(other_nans_packed); + auto all_nans = prepare_values(all_nans_f16); + auto some_nans = prepare_values(some_nans_f16); + auto other_nans = prepare_values(other_nans_f16); uint8_t valid_bitmap = 0x7F; // 0b01111111 // NaNs excluded From a2f72acbee57c20ecf584739f8c99b8d90bdbfcd Mon Sep 17 00:00:00 2001 From: benibus Date: Fri, 16 Jun 2023 17:51:46 -0400 Subject: [PATCH 07/37] Minor test tweaks --- cpp/src/parquet/statistics_test.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 24dac6cf2cd08..ea285ab99179d 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -1359,7 +1359,10 @@ class TestFloatStatistics : public ::testing::Test { using c_type = typename ParquetType::c_type; void Init(); - void SetUp() override { this->Init(); } + void SetUp() override { + this->Init(); + ASSERT_NE(EncodeValue(negative_zero_), EncodeValue(positive_zero_)); + } bool signbit(c_type val); void CheckEq(const c_type& l, const c_type& r); @@ -1372,9 +1375,11 @@ class TestFloatStatistics : public ::testing::Test { this->CheckEq(stats->min(), positive_zero_); ASSERT_TRUE(this->signbit(stats->min())); + ASSERT_EQ(stats->EncodeMin(), EncodeValue(negative_zero_)); this->CheckEq(stats->max(), positive_zero_); ASSERT_FALSE(this->signbit(stats->max())); + ASSERT_EQ(stats->EncodeMax(), EncodeValue(positive_zero_)); } // ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in @@ -1416,9 +1421,9 @@ class TestFloatStatistics : public ::testing::Test { auto some_nan_stats = MakeStatistics(descr); // Ingesting only nans should not yield valid min max AssertUnsetMinMax(some_nan_stats, all_nans); - // Ingesting a mix of NaNs and non-NaNs should not yield valid min max. + // Ingesting a mix of NaNs and non-NaNs should yield a valid min max. AssertMinMaxAre(some_nan_stats, some_nans, min, max); - // Ingesting only nans after a valid min/max, should have not effect + // Ingesting only nans after a valid min/max, should have no effect AssertMinMaxAre(some_nan_stats, all_nans, min, max); some_nan_stats = MakeStatistics(descr); From 1163b4e7708a48030e3057056df3b255815f68b4 Mon Sep 17 00:00:00 2001 From: benibus Date: Sat, 17 Jun 2023 20:40:21 -0400 Subject: [PATCH 08/37] Add tests for Float16 operators --- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/float16_test.cc | 135 +++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 cpp/src/arrow/util/float16_test.cc diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 3dc8eac1abf64..2e9487dcf50c8 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -48,6 +48,7 @@ add_arrow_test(utility-test checked_cast_test.cc compression_test.cc decimal_test.cc + float16_test.cc formatting_util_test.cc key_value_metadata_test.cc hashing_test.cc diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc new file mode 100644 index 0000000000000..75ee9dc816b97 --- /dev/null +++ b/cpp/src/arrow/util/float16_test.cc @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include + +#include "arrow/testing/gtest_util.h" +#include "arrow/util/float16.h" + +namespace arrow { +namespace util { +namespace { + +template +using Limits = std::numeric_limits; + +// Holds a float16 and its equivalent float32 +struct TestValue { + TestValue(Float16 f16, float f32) : f16(f16), f32(f32) {} + TestValue(uint16_t u16, float f32) : TestValue(Float16(u16), f32) {} + + Float16 f16; + float f32; +}; + +#define GENERATE_OPERATOR(NAME, OP) \ + struct NAME { \ + std::pair operator()(TestValue l, TestValue r) { \ + return std::make_pair((l.f32 OP r.f32), (l.f16 OP r.f16)); \ + } \ + } + +GENERATE_OPERATOR(CompareEq, ==); +GENERATE_OPERATOR(CompareNe, !=); +GENERATE_OPERATOR(CompareLt, <); +GENERATE_OPERATOR(CompareGt, >); +GENERATE_OPERATOR(CompareLe, <=); +GENERATE_OPERATOR(CompareGe, >=); + +#undef GENERATE_OPERATOR + +const std::vector g_test_values = { + TestValue(Limits::min(), +0.00006104f), + TestValue(Limits::max(), +65504.0f), + TestValue(Limits::lowest(), -65504.0f), + TestValue(+Limits::infinity(), +Limits::infinity()), + TestValue(-Limits::infinity(), -Limits::infinity()), + // Multiple (semantically equivalent) NaN representations + TestValue(0x7fff, Limits::quiet_NaN()), + TestValue(0xffff, Limits::quiet_NaN()), + TestValue(0x7e00, Limits::quiet_NaN()), + TestValue(0xfe00, Limits::quiet_NaN()), + // Positive/negative zeroes + TestValue(0x0000, +0.0f), + TestValue(0x8000, -0.0f), + // Miscellaneous values. In general, they're chosen to test the sign/exponent and + // exponent/mantissa boundaries + TestValue(0x101c, +0.000502f), + TestValue(0x901c, -0.000502f), + TestValue(0x101d, +0.0005022f), + TestValue(0x901d, -0.0005022f), + TestValue(0x121c, +0.000746f), + TestValue(0x921c, -0.000746f), + TestValue(0x141c, +0.001004f), + TestValue(0x941c, -0.001004f), + TestValue(0x501c, +32.9f), + TestValue(0xd01c, -32.9f), + // A few subnormals for good measure + TestValue(0x001c, +0.0000017f), + TestValue(0x801c, -0.0000017f), + TestValue(0x021c, +0.0000332f), + TestValue(0x821c, -0.0000332f), +}; + +template +class Float16OperatorTest : public ::testing::Test { + public: + void TestCompare(const std::vector& test_values) { + // Check all combinations of operands in both directions + for (size_t i = 0; i < test_values.size(); ++i) { + this->TestCompare(test_values, static_cast(i)); + } + } + + void TestCompare(const std::vector& test_values, int offset) { + const auto num_values = static_cast(test_values.size()); + ASSERT_TRUE(offset >= 0 && offset < num_values); + + int i = 0; + int j = offset; + while (i < num_values) { + ARROW_SCOPED_TRACE(i, ",", j); + + auto a = test_values[i]; + auto b = test_values[j]; + std::pair ret; + + // Results for float16 and float32 should be the same + ret = Operator{}(a, b); + ASSERT_EQ(ret.first, ret.second); + ret = Operator{}(b, a); + ASSERT_EQ(ret.first, ret.second); + + ++i; + j = (j + 1) % num_values; + } + } +}; + +using OperatorTypes = + ::testing::Types; + +TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes); + +TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); } + +} // namespace +} // namespace util +} // namespace arrow From bc640ff62a5ee5c9079c9a3f233aabd02e1860b1 Mon Sep 17 00:00:00 2001 From: benibus Date: Sat, 17 Jun 2023 21:55:15 -0400 Subject: [PATCH 09/37] Support multiple endians in Float16 class --- cpp/src/arrow/util/float16.h | 37 ++++++++++++++++++++++++++---- cpp/src/arrow/util/float16_test.cc | 33 ++++++++++++++++++++++++++ cpp/src/parquet/statistics.cc | 19 +++++++-------- cpp/src/parquet/statistics_test.cc | 18 ++++++--------- 4 files changed, 82 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index fedba0c29dc75..f2db88f3d3ea2 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -40,7 +40,7 @@ namespace util { /// /// NOTE: Methods in the class should not mutate the unerlying value or produce copies. /// Such functionality is delegated to subclasses. -class ARROW_EXPORT Float16Base { +class Float16Base { public: Float16Base() = default; constexpr explicit Float16Base(uint16_t value) : value_(value) {} @@ -56,13 +56,32 @@ class ARROW_EXPORT Float16Base { constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } - void ToBytes(uint8_t* dest) const { + /// \brief Copy the value's bytes in native-endian byte order + void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } + /// \brief Return the value's bytes in native-endian byte order + std::array ToBytes() const { + std::array bytes; + ToBytes(bytes.data()); + return bytes; + } + + void ToLittleEndian(uint8_t* dest) const { auto value = bit_util::ToLittleEndian(value_); std::memcpy(dest, &value, sizeof(value)); } - std::array ToBytes() const { + std::array ToLittleEndian() const { std::array bytes; - ToBytes(bytes.data()); + ToLittleEndian(bytes.data()); + return bytes; + } + + void ToBigEndian(uint8_t* dest) const { + auto value = bit_util::ToBigEndian(value_); + std::memcpy(dest, &value, sizeof(value)); + } + std::array ToBigEndian() const { + std::array bytes; + ToBigEndian(bytes.data()); return bytes; } @@ -120,16 +139,24 @@ class ARROW_EXPORT Float16Base { }; /// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t` -class ARROW_EXPORT Float16 : public Float16Base { +class Float16 : public Float16Base { public: using Float16Base::Float16Base; constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } constexpr Float16 operator+() const { return Float16(value_); } + /// \brief Read a `Float16` from memory in native-endian byte order static Float16 FromBytes(const uint8_t* src) { + return Float16(SafeLoadAs(src)); + } + + static Float16 FromLittleEndian(const uint8_t* src) { return Float16(bit_util::FromLittleEndian(SafeLoadAs(src))); } + static Float16 FromBigEndian(const uint8_t* src) { + return Float16(bit_util::FromBigEndian(SafeLoadAs(src))); + } }; static_assert(std::is_trivial_v); diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 75ee9dc816b97..4e6bc64d5b6a6 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -15,13 +15,16 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include #include "arrow/testing/gtest_util.h" +#include "arrow/util/endian.h" #include "arrow/util/float16.h" +#include "arrow/util/ubsan.h" namespace arrow { namespace util { @@ -130,6 +133,36 @@ TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes); TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); } +TEST(Float16Test, ToBytes) { + constexpr auto f16 = Float16(0xd01c); + auto bytes = f16.ToBytes(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); +#if ARROW_LITTLE_ENDIAN + bytes = f16.ToLittleEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); + bytes = f16.ToBigEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0x1cd0); +#else + bytes = f16.ToLittleEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0x1cd0); + bytes = f16.ToBigEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); +#endif +} + +TEST(Float16Test, FromBytes) { + constexpr uint16_t u16 = 0xd01c; + const auto* data = reinterpret_cast(&u16); + ASSERT_EQ(Float16::FromBytes(data), Float16(0xd01c)); +#if ARROW_LITTLE_ENDIAN + ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0xd01c)); + ASSERT_EQ(Float16::FromBigEndian(data), Float16(0x1cd0)); +#else + ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0)); + ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c)); +#endif +} + } // namespace } // namespace util } // namespace arrow diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index f15d6664bf220..2af592bc011d2 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -58,19 +58,19 @@ constexpr int value_length(int type_length, const FLBA& value) { return type_len // Static "constants" for normalizing float16 min/max values. These need to be expressed // as pointers because `Float16LogicalType` represents an FLBA. const uint8_t* float16_lowest() { - static const auto bytes = std::numeric_limits::lowest().ToBytes(); + static const auto bytes = std::numeric_limits::lowest().ToLittleEndian(); return bytes.data(); } const uint8_t* float16_max() { - static const auto bytes = std::numeric_limits::max().ToBytes(); + static const auto bytes = std::numeric_limits::max().ToLittleEndian(); return bytes.data(); } const uint8_t* float16_positive_zero() { - static const auto bytes = Float16(0).ToBytes(); + static const auto bytes = Float16(0).ToLittleEndian(); return bytes.data(); } const uint8_t* float16_negative_zero() { - static const auto bytes = (-Float16(0)).ToBytes(); + static const auto bytes = (-Float16(0)).ToLittleEndian(); return bytes.data(); } @@ -305,12 +305,13 @@ struct Float16CompareHelper { static T DefaultMax() { return T{float16_lowest()}; } static T Coalesce(T val, T fallback) { - return val.ptr != nullptr && Float16::FromBytes(val.ptr).is_nan() ? fallback : val; + return val.ptr != nullptr && Float16::FromLittleEndian(val.ptr).is_nan() ? fallback + : val; } static inline bool Compare(int type_length, const T& a, const T& b) { - const auto lhs = Float16::FromBytes(a.ptr); - const auto rhs = Float16::FromBytes(b.ptr); + const auto lhs = Float16::FromLittleEndian(a.ptr); + const auto rhs = Float16::FromLittleEndian(b.ptr); // NaN is handled here (same behavior as native float compare) return lhs < rhs; } @@ -372,8 +373,8 @@ CleanStatistic(std::pair min_max, LogicalType::Type::type) { optional> CleanFloat16Statistic(std::pair min_max) { FLBA min_flba = min_max.first; FLBA max_flba = min_max.second; - Float16 min = Float16::FromBytes(min_flba.ptr); - Float16 max = Float16::FromBytes(max_flba.ptr); + Float16 min = Float16::FromLittleEndian(min_flba.ptr); + Float16 max = Float16::FromLittleEndian(max_flba.ptr); if (min.is_nan() || max.is_nan()) { return ::std::nullopt; diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index ea285ab99179d..7de4e3f3840bf 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -67,7 +67,7 @@ class BufferedFloat16 : public ::arrow::util::Float16Base { public: explicit BufferedFloat16(Float16 f16) : Float16Base(f16) { buffer_ = *::arrow::AllocateBuffer(sizeof(value_)); - ToBytes(buffer_->mutable_data()); + ToLittleEndian(buffer_->mutable_data()); } explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {} @@ -77,10 +77,6 @@ class BufferedFloat16 : public ::arrow::util::Float16Base { BufferedFloat16 operator+() const { return *this; } BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); } - static BufferedFloat16 FromBytes(const uint8_t* src) { - return BufferedFloat16(Float16::FromBytes(src)); - } - private: std::shared_ptr<::arrow::Buffer> buffer_; }; @@ -1168,7 +1164,7 @@ void TestStatisticsSortOrder::SetValues() { values_buf_.resize(kNumBytes); uint8_t* ptr = values_buf_.data(); for (int i = 0; i < NUM_VALUES; ++i) { - Float16(u16_vals[i]).ToBytes(ptr); + Float16(u16_vals[i]).ToLittleEndian(ptr); values_[i].ptr = ptr; ptr += kValueLen; } @@ -1454,9 +1450,9 @@ void TestFloatStatistics::Init() { template <> void TestFloatStatistics::Init() { data_buf_.resize(4); - (+Float16(0)).ToBytes(&data_buf_[0]); + (+Float16(0)).ToLittleEndian(&data_buf_[0]); positive_zero_ = FLBA{&data_buf_[0]}; - (-Float16(0)).ToBytes(&data_buf_[2]); + (-Float16(0)).ToLittleEndian(&data_buf_[2]); negative_zero_ = FLBA{&data_buf_[2]}; } @@ -1477,8 +1473,8 @@ void TestFloatStatistics::CheckEq(const c_type& l, const c_type& r) { } template <> void TestFloatStatistics::CheckEq(const c_type& a, const c_type& b) { - auto l = Float16::FromBytes(a.ptr); - auto r = Float16::FromBytes(b.ptr); + auto l = Float16::FromLittleEndian(a.ptr); + auto r = Float16::FromLittleEndian(b.ptr); ASSERT_EQ(l, r); } @@ -1488,7 +1484,7 @@ bool TestFloatStatistics::signbit(c_type val) { } template <> bool TestFloatStatistics::signbit(c_type val) { - return Float16::FromBytes(val.ptr).signbit(); + return Float16::FromLittleEndian(val.ptr).signbit(); } template From 2d7e65fa932b7db1426bb9f8bce90e063d4dcda3 Mon Sep 17 00:00:00 2001 From: benibus Date: Sat, 17 Jun 2023 23:21:51 -0400 Subject: [PATCH 10/37] Small refactor --- cpp/src/arrow/util/float16.h | 14 +++------- cpp/src/arrow/util/float16_test.cc | 42 ++++++++++++------------------ 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index f2db88f3d3ea2..7959a29aa7900 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -66,23 +66,17 @@ class Float16Base { } void ToLittleEndian(uint8_t* dest) const { - auto value = bit_util::ToLittleEndian(value_); - std::memcpy(dest, &value, sizeof(value)); + Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(dest); } std::array ToLittleEndian() const { - std::array bytes; - ToLittleEndian(bytes.data()); - return bytes; + return Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(); } void ToBigEndian(uint8_t* dest) const { - auto value = bit_util::ToBigEndian(value_); - std::memcpy(dest, &value, sizeof(value)); + Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(dest); } std::array ToBigEndian() const { - std::array bytes; - ToBigEndian(bytes.data()); - return bytes; + return Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(); } friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) { diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 4e6bc64d5b6a6..446d89c30a788 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -95,33 +95,25 @@ template class Float16OperatorTest : public ::testing::Test { public: void TestCompare(const std::vector& test_values) { - // Check all combinations of operands in both directions - for (size_t i = 0; i < test_values.size(); ++i) { - this->TestCompare(test_values, static_cast(i)); - } - } - - void TestCompare(const std::vector& test_values, int offset) { const auto num_values = static_cast(test_values.size()); - ASSERT_TRUE(offset >= 0 && offset < num_values); - int i = 0; - int j = offset; - while (i < num_values) { - ARROW_SCOPED_TRACE(i, ",", j); - - auto a = test_values[i]; - auto b = test_values[j]; - std::pair ret; - - // Results for float16 and float32 should be the same - ret = Operator{}(a, b); - ASSERT_EQ(ret.first, ret.second); - ret = Operator{}(b, a); - ASSERT_EQ(ret.first, ret.second); - - ++i; - j = (j + 1) % num_values; + // Check all combinations of operands in both directions + for (int offset = 0; offset < num_values; ++offset) { + int i = 0; + int j = offset; + while (i < num_values) { + ARROW_SCOPED_TRACE(i, ",", j); + + auto a = test_values[i]; + auto b = test_values[j]; + + // Results for float16 and float32 should be the same + auto ret = Operator{}(a, b); + ASSERT_EQ(ret.first, ret.second); + + ++i; + j = (j + 1) % num_values; + } } } }; From 5e925ac62c5488c1b8acde78abd764c627babd1f Mon Sep 17 00:00:00 2001 From: benibus Date: Wed, 21 Jun 2023 14:23:29 -0400 Subject: [PATCH 11/37] Address more review points --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/util/float16.cc | 28 +++++++++++ cpp/src/arrow/util/float16.h | 53 ++++++++++++++------- cpp/src/arrow/util/float16_test.cc | 40 +++++++++------- cpp/src/parquet/statistics.cc | 74 ++++++++++++++++-------------- 5 files changed, 128 insertions(+), 68 deletions(-) create mode 100644 cpp/src/arrow/util/float16.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 101b089ba837f..24e8eefad1523 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -223,6 +223,7 @@ set(ARROW_SRCS util/debug.cc util/decimal.cc util/delimiting.cc + util/float16.cc util/formatting.cc util/future.cc util/hashing.cc diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc new file mode 100644 index 0000000000000..825cbf0cb1fa3 --- /dev/null +++ b/cpp/src/arrow/util/float16.cc @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/util/float16.h" + +namespace arrow { +namespace util { + +std::ostream& operator<<(std::ostream& os, Float16Base arg) { return (os << arg.bits()); } + +} // namespace util +} // namespace arrow diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index 7959a29aa7900..74308a09a3cfd 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -18,13 +18,13 @@ #pragma once #include -#include #include #include +#include #include #include -#include "arrow/util/bit_util.h" +#include "arrow/util/endian.h" #include "arrow/util/ubsan.h" #include "arrow/util/visibility.h" @@ -45,38 +45,57 @@ class Float16Base { Float16Base() = default; constexpr explicit Float16Base(uint16_t value) : value_(value) {} + /// \brief Return the value's integer representation constexpr uint16_t bits() const { return value_; } constexpr explicit operator uint16_t() const { return bits(); } + /// \brief Return true if the value is negative (sign bit is set) constexpr bool signbit() const { return (value_ & 0x8000) != 0; } + /// \brief Return true if the value is NaN constexpr bool is_nan() const { return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0; } + /// \brief Return true if the value is positive/negative infinity constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } + /// \brief Return true if the value is positive/negative zero constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } /// \brief Copy the value's bytes in native-endian byte order void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } /// \brief Return the value's bytes in native-endian byte order - std::array ToBytes() const { - std::array bytes; - ToBytes(bytes.data()); - return bytes; + constexpr std::array ToBytes() const { +#if ARROW_LITTLE_ENDIAN + return ToLittleEndian(); +#else + return ToBigEndian(); +#endif } + /// \brief Copy the value's bytes in little-endian byte order void ToLittleEndian(uint8_t* dest) const { Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(dest); } - std::array ToLittleEndian() const { - return Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(); + /// \brief Return the value's bytes in little-endian byte order + constexpr std::array ToLittleEndian() const { +#if ARROW_LITTLE_ENDIAN + return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; +#else + return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; +#endif } + /// \brief Copy the value's bytes in big-endian byte order void ToBigEndian(uint8_t* dest) const { Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(dest); } - std::array ToBigEndian() const { - return Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(); + /// \brief Return the value's bytes in big-endian byte order + constexpr std::array ToBigEndian() const { +#if ARROW_LITTLE_ENDIAN + return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; +#else + return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; +#endif } friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) { @@ -98,13 +117,10 @@ class Float16Base { return !Float16Base::CompareLt(rhs, lhs); } friend constexpr bool operator>=(Float16Base lhs, Float16Base rhs) { - if (lhs.is_nan() || rhs.is_nan()) return false; - return !Float16Base::CompareLt(lhs, rhs); + return rhs <= lhs; } - friend std::ostream& operator<<(std::ostream& os, Float16Base arg) { - return (os << arg.bits()); - } + ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16Base arg); protected: uint16_t value_; @@ -118,7 +134,7 @@ class Float16Base { if (lhs.signbit()) { if (rhs.signbit()) { // Both are negative - return (lhs.bits() & 0x7fff) > (rhs.bits() & 0x7fff); + return lhs.bits() > rhs.bits(); } else { // Handle +/-0 return !lhs.is_zero() || rhs.bits() != 0; @@ -127,7 +143,7 @@ class Float16Base { return false; } else { // Both are positive - return (lhs.bits() & 0x7fff) < (rhs.bits() & 0x7fff); + return lhs.bits() < rhs.bits(); } } }; @@ -145,9 +161,12 @@ class Float16 : public Float16Base { return Float16(SafeLoadAs(src)); } + /// \brief Read a `Float16` from memory in little-endian byte order static Float16 FromLittleEndian(const uint8_t* src) { return Float16(bit_util::FromLittleEndian(SafeLoadAs(src))); } + + /// \brief Read a `Float16` from memory in big-endian byte order static Float16 FromBigEndian(const uint8_t* src) { return Float16(bit_util::FromBigEndian(SafeLoadAs(src))); } diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 446d89c30a788..1ccb9db7b0e25 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -98,10 +98,8 @@ class Float16OperatorTest : public ::testing::Test { const auto num_values = static_cast(test_values.size()); // Check all combinations of operands in both directions - for (int offset = 0; offset < num_values; ++offset) { - int i = 0; - int j = offset; - while (i < num_values) { + for (int i = 0; i < num_values; ++i) { + for (int j = 0; j < num_values; ++j) { ARROW_SCOPED_TRACE(i, ",", j); auto a = test_values[i]; @@ -110,9 +108,6 @@ class Float16OperatorTest : public ::testing::Test { // Results for float16 and float32 should be the same auto ret = Operator{}(a, b); ASSERT_EQ(ret.first, ret.second); - - ++i; - j = (j + 1) % num_values; } } } @@ -127,19 +122,32 @@ TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); } TEST(Float16Test, ToBytes) { constexpr auto f16 = Float16(0xd01c); - auto bytes = f16.ToBytes(); - ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); + std::array bytes; + auto load = [&bytes]() { return SafeLoadAs(bytes.data()); }; + + // Test native-endian + f16.ToBytes(bytes.data()); + ASSERT_EQ(load(), 0xd01c); + bytes = f16.ToBytes(); + ASSERT_EQ(load(), 0xd01c); + #if ARROW_LITTLE_ENDIAN - bytes = f16.ToLittleEndian(); - ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); - bytes = f16.ToBigEndian(); - ASSERT_EQ(SafeLoadAs(bytes.data()), 0x1cd0); + constexpr uint16_t expected_le = 0xd01c; + constexpr uint16_t expected_be = 0x1cd0; #else + constexpr uint16_t expected_le = 0x1cd0; + constexpr uint16_t expected_be = 0xd01c; +#endif + // Test little-endian + f16.ToLittleEndian(bytes.data()); + ASSERT_EQ(load(), expected_le); bytes = f16.ToLittleEndian(); - ASSERT_EQ(SafeLoadAs(bytes.data()), 0x1cd0); + ASSERT_EQ(load(), expected_le); + // Test big-endian + f16.ToBigEndian(bytes.data()); + ASSERT_EQ(load(), expected_be); bytes = f16.ToBigEndian(); - ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); -#endif + ASSERT_EQ(load(), expected_be); } TEST(Float16Test, FromBytes) { diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 2af592bc011d2..a7691dd568796 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -57,22 +57,20 @@ constexpr int value_length(int type_length, const FLBA& value) { return type_len // Static "constants" for normalizing float16 min/max values. These need to be expressed // as pointers because `Float16LogicalType` represents an FLBA. -const uint8_t* float16_lowest() { - static const auto bytes = std::numeric_limits::lowest().ToLittleEndian(); - return bytes.data(); -} -const uint8_t* float16_max() { - static const auto bytes = std::numeric_limits::max().ToLittleEndian(); - return bytes.data(); -} -const uint8_t* float16_positive_zero() { - static const auto bytes = Float16(0).ToLittleEndian(); - return bytes.data(); -} -const uint8_t* float16_negative_zero() { - static const auto bytes = (-Float16(0)).ToLittleEndian(); - return bytes.data(); -} +struct Float16Constants { + static constexpr const uint8_t* lowest() { return lowest_.data(); } + static constexpr const uint8_t* max() { return max_.data(); } + static constexpr const uint8_t* positive_zero() { return positive_zero_.data(); } + static constexpr const uint8_t* negative_zero() { return negative_zero_.data(); } + + private: + using Bytes = std::array; + static constexpr Bytes lowest_ = + std::numeric_limits::lowest().ToLittleEndian(); + static constexpr Bytes max_ = std::numeric_limits::max().ToLittleEndian(); + static constexpr Bytes positive_zero_ = (+Float16(0)).ToLittleEndian(); + static constexpr Bytes negative_zero_ = (-Float16(0)).ToLittleEndian(); +}; template struct CompareHelper { @@ -301,12 +299,12 @@ struct CompareHelper struct Float16CompareHelper { using T = FLBA; - static T DefaultMin() { return T{float16_max()}; } - static T DefaultMax() { return T{float16_lowest()}; } + static T DefaultMin() { return T{Float16Constants::max()}; } + static T DefaultMax() { return T{Float16Constants::lowest()}; } static T Coalesce(T val, T fallback) { - return val.ptr != nullptr && Float16::FromLittleEndian(val.ptr).is_nan() ? fallback - : val; + return (val.ptr == nullptr || Float16::FromLittleEndian(val.ptr).is_nan()) ? fallback + : val; } static inline bool Compare(int type_length, const T& a, const T& b) { @@ -386,10 +384,10 @@ optional> CleanFloat16Statistic(std::pair min_ } if (min == Float16(0)) { - min_flba = FLBA{float16_negative_zero()}; + min_flba = FLBA{Float16Constants::negative_zero()}; } if (max == -Float16(0)) { - max_flba = FLBA{float16_positive_zero()}; + max_flba = FLBA{Float16Constants::positive_zero()}; } return {{min_flba, max_flba}}; @@ -540,13 +538,13 @@ std::pair TypedComparatorImpl::GetMi return GetMinMaxBinaryHelper(*this, values); } -static LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) { +LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) { if (const auto& logical_type = descr->logical_type()) { return logical_type->type(); } return LogicalType::Type::NONE; } -static LogicalType::Type::type LogicalTypeId(const Statistics& stats) { +LogicalType::Type::type LogicalTypeId(const Statistics& stats) { return LogicalTypeId(stats.descr()); } @@ -618,20 +616,26 @@ class TypedStatisticsImpl : public TypedStatistics { void IncrementNumValues(int64_t n) override { num_values_ += n; } + static bool IsMeaningfulLogicalType(LogicalType::Type::type type) { + switch (type) { + case LogicalType::Type::FLOAT16: + return true; + default: + return false; + } + } + bool Equals(const Statistics& raw_other) const override { if (physical_type() != raw_other.physical_type()) return false; const auto logical_id = LogicalTypeId(*this); - switch (logical_id) { - // Only compare against logical types that influence the interpretation of the - // physical type - case LogicalType::Type::FLOAT16: - if (LogicalTypeId(raw_other) != logical_id) { - return false; - } - break; - default: - break; + const auto other_logical_id = LogicalTypeId(raw_other); + // Only compare against logical types that influence the interpretation of the + // physical type + if (IsMeaningfulLogicalType(logical_id)) { + if (logical_id != other_logical_id) return false; + } else if (IsMeaningfulLogicalType(other_logical_id)) { + return false; } const auto& other = checked_cast(raw_other); @@ -922,7 +926,7 @@ std::shared_ptr DoMakeComparator(Type::type physical_type, case Type::FIXED_LEN_BYTE_ARRAY: if (logical_type == LogicalType::Type::FLOAT16) { return std::make_shared< - TypedComparatorImpl>(); + TypedComparatorImpl>(type_length); } return std::make_shared>(type_length); default: From 87d121c264db07a807bfe5b05b6efb9895552d42 Mon Sep 17 00:00:00 2001 From: benibus Date: Sun, 9 Jul 2023 18:13:56 -0400 Subject: [PATCH 12/37] Support reading/writing `arrow::HalfFloat` --- .../parquet/arrow/arrow_reader_writer_test.cc | 33 +++++--- cpp/src/parquet/arrow/arrow_schema_test.cc | 18 +---- cpp/src/parquet/arrow/reader_internal.cc | 80 +++++++++++++++++++ cpp/src/parquet/arrow/schema.cc | 5 ++ cpp/src/parquet/arrow/schema_internal.cc | 2 + cpp/src/parquet/arrow/test_util.h | 13 ++- cpp/src/parquet/column_writer.cc | 71 ++++++++++++++++ 7 files changed, 195 insertions(+), 27 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 4e23d0fab5c69..8f1c64b81322b 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -143,6 +143,8 @@ std::shared_ptr get_logical_type(const DataType& type) { return LogicalType::Date(); case ArrowId::DATE64: return LogicalType::Date(); + case ArrowId::HALF_FLOAT: + return LogicalType::Float16(); case ArrowId::TIMESTAMP: { const auto& ts_type = static_cast(type); const bool adjusted_to_utc = !(ts_type.timezone().empty()); @@ -220,6 +222,7 @@ ParquetType::type get_physical_type(const DataType& type) { case ArrowId::FIXED_SIZE_BINARY: case ArrowId::DECIMAL128: case ArrowId::DECIMAL256: + case ArrowId::HALF_FLOAT: return ParquetType::FIXED_LEN_BYTE_ARRAY; case ArrowId::DATE32: return ParquetType::INT32; @@ -525,6 +528,9 @@ static std::shared_ptr MakeSimpleSchema(const DataType& type, byte_width = static_cast(values_type).byte_width(); break; + case ::arrow::Type::HALF_FLOAT: + byte_width = sizeof(::arrow::HalfFloatType::c_type); + break; case ::arrow::Type::DECIMAL128: case ::arrow::Type::DECIMAL256: { const auto& decimal_type = static_cast(values_type); @@ -537,6 +543,9 @@ static std::shared_ptr MakeSimpleSchema(const DataType& type, case ::arrow::Type::FIXED_SIZE_BINARY: byte_width = static_cast(type).byte_width(); break; + case ::arrow::Type::HALF_FLOAT: + byte_width = sizeof(::arrow::HalfFloatType::c_type); + break; case ::arrow::Type::DECIMAL128: case ::arrow::Type::DECIMAL256: { const auto& decimal_type = static_cast(type); @@ -840,12 +849,12 @@ typedef ::testing::Types< ::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type, ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type, ::arrow::Date32Type, ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType, - ::arrow::BinaryType, ::arrow::FixedSizeBinaryType, DecimalWithPrecisionAndScale<1>, - DecimalWithPrecisionAndScale<5>, DecimalWithPrecisionAndScale<10>, - DecimalWithPrecisionAndScale<19>, DecimalWithPrecisionAndScale<23>, - DecimalWithPrecisionAndScale<27>, DecimalWithPrecisionAndScale<38>, - Decimal256WithPrecisionAndScale<39>, Decimal256WithPrecisionAndScale<56>, - Decimal256WithPrecisionAndScale<76>> + ::arrow::BinaryType, ::arrow::FixedSizeBinaryType, ::arrow::HalfFloatType, + DecimalWithPrecisionAndScale<1>, DecimalWithPrecisionAndScale<5>, + DecimalWithPrecisionAndScale<10>, DecimalWithPrecisionAndScale<19>, + DecimalWithPrecisionAndScale<23>, DecimalWithPrecisionAndScale<27>, + DecimalWithPrecisionAndScale<38>, Decimal256WithPrecisionAndScale<39>, + Decimal256WithPrecisionAndScale<56>, Decimal256WithPrecisionAndScale<76>> TestTypes; TYPED_TEST_SUITE(TestParquetIO, TestTypes); @@ -916,9 +925,15 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) { } TYPED_TEST(TestParquetIO, SingleColumnOptionalDictionaryWrite) { - // Skip tests for BOOL as we don't create dictionaries for it. - if (TypeParam::type_id == ::arrow::Type::BOOL) { - return; + switch (TypeParam::type_id) { + // Skip tests for BOOL as we don't create dictionaries for it. + case ::arrow::Type::BOOL: + // Skip tests for HALF_FLOAT as it's not currently supported by `dictionary_encode` + case ::arrow::Type::HALF_FLOAT: + GTEST_SKIP(); + break; + default: + break; } std::shared_ptr values; diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index f11101eb24298..a1cc989ba8ea0 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -851,6 +851,8 @@ TEST_F(TestConvertArrowSchema, ArrowFields) { ParquetType::FIXED_LEN_BYTE_ARRAY, 7}, {"decimal(32, 8)", ::arrow::decimal(32, 8), LogicalType::Decimal(32, 8), ParquetType::FIXED_LEN_BYTE_ARRAY, 14}, + {"float16", ::arrow::float16(), LogicalType::Float16(), + ParquetType::FIXED_LEN_BYTE_ARRAY, 2}, {"time32", ::arrow::time32(::arrow::TimeUnit::MILLI), LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), ParquetType::INT32, -1}, {"time64(microsecond)", ::arrow::time64(::arrow::TimeUnit::MICRO), @@ -906,22 +908,6 @@ TEST_F(TestConvertArrowSchema, ArrowFields) { // ASSERT_NO_FATAL_FAILURE(); } -TEST_F(TestConvertArrowSchema, ArrowNonconvertibleFields) { - struct FieldConstructionArguments { - std::string name; - std::shared_ptr<::arrow::DataType> datatype; - }; - - std::vector cases = { - {"float16", ::arrow::float16()}, - }; - - for (const FieldConstructionArguments& c : cases) { - auto field = ::arrow::field(c.name, c.datatype); - ASSERT_RAISES(NotImplemented, ConvertSchema({field})); - } -} - TEST_F(TestConvertArrowSchema, ParquetFlatPrimitivesAsDictionaries) { std::vector parquet_fields; std::vector> arrow_fields; diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc index 5146aa12c2c36..f4e3a89e71a31 100644 --- a/cpp/src/parquet/arrow/reader_internal.cc +++ b/cpp/src/parquet/arrow/reader_internal.cc @@ -42,6 +42,7 @@ #include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/endian.h" +#include "arrow/util/float16.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/ubsan.h" @@ -82,6 +83,7 @@ using ::arrow::bit_util::FromBigEndian; using ::arrow::internal::checked_cast; using ::arrow::internal::checked_pointer_cast; using ::arrow::internal::SafeLeftShift; +using ::arrow::util::Float16; using ::arrow::util::SafeLoadAs; using parquet::internal::BinaryRecordReader; @@ -713,6 +715,77 @@ Status TransferDecimal(RecordReader* reader, MemoryPool* pool, return Status::OK(); } +static inline Status ConvertToHalfFloat(const Array& array, + const std::shared_ptr& type, + MemoryPool* pool, std::shared_ptr* out) { + constexpr int32_t byte_width = sizeof(uint16_t); + DCHECK_EQ(checked_cast(*type).byte_width(), byte_width); + + // We read the halffloat (uint16_t) bytes from a raw binary array, in which they're + // assumed to be little-endian. + const auto& binary_array = checked_cast(array); + DCHECK_EQ(checked_cast(*binary_array.type()) + .byte_width(), + byte_width); + + // Number of elements in the halffloat array + const int64_t length = binary_array.length(); + // Allocate data for the output halffloat array + ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * byte_width, pool)); + uint8_t* out_ptr = data->mutable_data(); + + const int64_t null_count = binary_array.null_count(); + // Copy the values to the output array in native-endian format + if (null_count > 0) { + for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) { + Float16 f16{0}; + if (binary_array.IsValid(i)) { + const uint8_t* in_ptr = binary_array.GetValue(i); + f16 = Float16::FromLittleEndian(in_ptr); + } + f16.ToBytes(out_ptr); + } + } else { +#if ARROW_LITTLE_ENDIAN + // No need to byte-swap, so do a simple copy + std::memcpy(out_ptr, binary_array.raw_values(), length * byte_width); +#else + for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) { + const uint8_t* in_ptr = binary_array.GetValue(i); + Float16::FromLittleEndian(in_ptr).ToBytes(out_ptr); + } +#endif + } + + *out = std::make_shared<::arrow::HalfFloatArray>( + type, length, std::move(data), binary_array.null_bitmap(), null_count); + return Status::OK(); +} + +/// \brief Convert an arrow::BinaryArray to an arrow::HalfFloatArray +/// We do this by: +/// 1. Creating an arrow::BinaryArray from the RecordReader's builder +/// 2. Allocating a buffer for the arrow::HalfFloatArray +/// 3. Converting the little-endian bytes in each BinaryArray entry to native-endian +/// halffloat (uint16_t) values +Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool, + const std::shared_ptr& field, Datum* out) { + auto binary_reader = dynamic_cast(reader); + DCHECK(binary_reader); + ::arrow::ArrayVector chunks = binary_reader->GetBuilderChunks(); + for (size_t i = 0; i < chunks.size(); ++i) { + std::shared_ptr chunk_as_half; + RETURN_NOT_OK(ConvertToHalfFloat(*chunks[i], field->type(), pool, &chunk_as_half)); + // Replace the chunk, which will hopefully also free memory as we go + chunks[i] = chunk_as_half; + } + if (!field->nullable()) { + ReconstructChunksWithoutNulls(&chunks); + } + *out = std::make_shared(chunks, field->type()); + return Status::OK(); +} + } // namespace #define TRANSFER_INT32(ENUM, ArrowType) \ @@ -772,6 +845,13 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr& va RETURN_NOT_OK(TransferBinary(reader, pool, value_field, &chunked_result)); result = chunked_result; } break; + case ::arrow::Type::HALF_FLOAT: { + if (descr->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) { + return Status::Invalid("Physical type for ", value_field->type()->ToString(), + " must be fixed length binary"); + } + RETURN_NOT_OK(TransferHalfFloat(reader, pool, value_field, &result)); + } break; case ::arrow::Type::DECIMAL128: { switch (descr->physical_type()) { case ::parquet::Type::INT32: { diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 3323b7ff8b608..f5484f131eb07 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -397,6 +397,11 @@ Status FieldToNode(const std::string& name, const std::shared_ptr& field, case ArrowTypeId::DURATION: type = ParquetType::INT64; break; + case ArrowTypeId::HALF_FLOAT: + type = ParquetType::FIXED_LEN_BYTE_ARRAY; + logical_type = LogicalType::Float16(); + length = sizeof(uint16_t); + break; case ArrowTypeId::STRUCT: { auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type()); return StructToNode(struct_type, name, field->nullable(), field_id, properties, diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc index da0427cb31000..bb75cce084097 100644 --- a/cpp/src/parquet/arrow/schema_internal.cc +++ b/cpp/src/parquet/arrow/schema_internal.cc @@ -130,6 +130,8 @@ Result> FromFLBA(const LogicalType& logical_type, switch (logical_type.type()) { case LogicalType::Type::DECIMAL: return MakeArrowDecimal(logical_type); + case LogicalType::Type::FLOAT16: + return ::arrow::float16(); case LogicalType::Type::NONE: case LogicalType::Type::INTERVAL: case LogicalType::Type::UUID: diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index 16c03130c9672..16a0d24a22497 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -201,8 +201,17 @@ ::arrow::enable_if_floating_point NullableArray( size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr* out) { using c_type = typename ArrowType::c_type; std::vector values; - ::arrow::random_real(size, seed, static_cast(-1e10), static_cast(1e10), - &values); + if constexpr (::arrow::is_half_float_type::value) { + std::vector signed_values; + constexpr int16_t min = 0xf0e2; // -1e4 + constexpr int16_t max = 0x70e2; // +1e4 + ::arrow::randint(size, min, max, &signed_values); + std::transform(signed_values.begin(), signed_values.end(), std::back_inserter(values), + [](int16_t v) { return static_cast(v); }); + } else { + ::arrow::random_real(size, seed, static_cast(-1e10), + static_cast(1e10), &values); + } std::vector valid_bytes(size, 1); for (size_t i = 0; i < num_nulls; i++) { diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 5dff533c1cce2..715432d005492 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -39,6 +39,7 @@ #include "arrow/util/compression.h" #include "arrow/util/crc32.h" #include "arrow/util/endian.h" +#include "arrow/util/float16.h" #include "arrow/util/logging.h" #include "arrow/util/rle_encoding.h" #include "arrow/util/type_traits.h" @@ -65,6 +66,7 @@ using arrow::Status; using arrow::bit_util::BitWriter; using arrow::internal::checked_cast; using arrow::internal::checked_pointer_cast; +using arrow::util::Float16; using arrow::util::RleEncoder; namespace bit_util = arrow::bit_util; @@ -2295,6 +2297,74 @@ struct SerializeFunctor< int64_t* scratch; }; +// ---------------------------------------------------------------------- +// Write Arrow to Float16 + +// Requires a custom serializer because Float16s in Parquet are stored as a 2-byte +// (little-endian) FLBA, whereas in Arrow they're a native `uint16_t`. Also, a temporary +// buffer is needed if there's an endian mismatch. +template <> +struct SerializeFunctor<::parquet::FLBAType, ::arrow::HalfFloatType> { + Status Serialize(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx, + FLBA* out) { +#if ARROW_LITTLE_ENDIAN + return SerializeInPlace(array, ctx, out); +#else + return SerializeWithScratch(array, ctx, out); +#endif + } + + Status SerializeInPlace(const ::arrow::HalfFloatArray& array, ArrowWriteContext*, + FLBA* out) { + const uint16_t* values = array.raw_values(); + if (array.null_count() == 0) { + for (int64_t i = 0; i < array.length(); ++i) { + out[i] = ToFLBA(&values[i]); + } + } else { + for (int64_t i = 0; i < array.length(); ++i) { + out[i] = array.IsValid(i) ? ToFLBA(&values[i]) : FLBA{}; + } + } + return Status::OK(); + } + + Status SerializeWithScratch(const ::arrow::HalfFloatArray& array, + ArrowWriteContext* ctx, FLBA* out) { + AllocateScratch(array, ctx); + if (array.null_count() == 0) { + for (int64_t i = 0; i < array.length(); ++i) { + out[i] = ToFLBA(array.Value(i)); + } + } else { + for (int64_t i = 0; i < array.length(); ++i) { + out[i] = array.IsValid(i) ? ToFLBA(array.Value(i)) : FLBA{}; + } + } + return Status::OK(); + } + + private: + FLBA ToFLBA(const uint16_t* value_ptr) const { + return FLBA{reinterpret_cast(value_ptr)}; + } + FLBA ToFLBA(uint16_t value) { + auto* out = reinterpret_cast(scratch_++); + Float16(value).ToLittleEndian(out); + return FLBA{out}; + } + + void AllocateScratch(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx) { + int64_t non_null_count = array.length() - array.null_count(); + int64_t size = non_null_count * sizeof(uint16_t); + scratch_buffer_ = AllocateBuffer(ctx->memory_pool, size); + scratch_ = reinterpret_cast(scratch_buffer_->mutable_data()); + } + + std::shared_ptr scratch_buffer_; + uint16_t* scratch_; +}; + template <> Status TypedColumnWriterImpl::WriteArrowDense( const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, @@ -2303,6 +2373,7 @@ Status TypedColumnWriterImpl::WriteArrowDense( WRITE_SERIALIZE_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryType, FLBAType) WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, FLBAType) WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, FLBAType) + WRITE_SERIALIZE_CASE(HALF_FLOAT, HalfFloatType, FLBAType) default: break; } From a064bec3bcd80901c79a38a66e9775da4673c16f Mon Sep 17 00:00:00 2001 From: benibus Date: Mon, 10 Jul 2023 17:44:20 -0400 Subject: [PATCH 13/37] Fix MSVC truncation warning --- cpp/src/parquet/arrow/test_util.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index 16a0d24a22497..1ca221c0fb8f6 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -203,8 +203,8 @@ ::arrow::enable_if_floating_point NullableArray( std::vector values; if constexpr (::arrow::is_half_float_type::value) { std::vector signed_values; - constexpr int16_t min = 0xf0e2; // -1e4 - constexpr int16_t max = 0x70e2; // +1e4 + constexpr auto min = static_cast(0xf0e2); // -1e4 + constexpr auto max = static_cast(0x70e2); // +1e4 ::arrow::randint(size, min, max, &signed_values); std::transform(signed_values.begin(), signed_values.end(), std::back_inserter(values), [](int16_t v) { return static_cast(v); }); From 6b3d61ca99f00284e9af63a77afef006ca928c91 Mon Sep 17 00:00:00 2001 From: benibus Date: Mon, 17 Jul 2023 21:01:33 -0400 Subject: [PATCH 14/37] Fix test input generation --- cpp/src/parquet/arrow/test_util.h | 43 ++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index 1ca221c0fb8f6..74b8a36df8592 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -65,12 +65,44 @@ struct Decimal256WithPrecisionAndScale { static constexpr int32_t scale = PRECISION - 1; }; +inline std::vector RandomHalfFloatValues(size_t size, uint16_t min, + uint16_t max) { + auto to_signed = [](uint16_t in) -> int16_t { + // Clamp magnitude to exclude representations of NaN/infinity. Within this range, + // binary float16s have the same ordering as int16s after conversion. + int16_t out = static_cast(std::max(in & 0x7fff, 0x7bff)); + // Negate if sign bit is set + return (in & 0x8000) != 0 ? -out : out; + }; + auto to_unsigned = [](int16_t in) -> uint16_t { + uint16_t out = static_cast(std::abs(in)); + // Set sign bit if negative + return in < 0 ? (out | 0x8000) : out; + }; + + const auto signed_min = to_signed(min); + const auto signed_max = to_signed(max); + std::vector signed_values; + ::arrow::randint(size, signed_min, signed_max, &signed_values); + + std::vector values(signed_values.size()); + std::transform(signed_values.begin(), signed_values.end(), values.begin(), to_unsigned); + return values; +} + template ::arrow::enable_if_floating_point NonNullArray( size_t size, std::shared_ptr* out) { using c_type = typename ArrowType::c_type; std::vector values; - ::arrow::random_real(size, 0, static_cast(0), static_cast(1), &values); + if constexpr (::arrow::is_half_float_type::value) { + constexpr uint16_t min = 0x0000; // 0.0 + constexpr uint16_t max = 0x3c00; // 1.0 + values = RandomHalfFloatValues(size, min, max); + } else { + ::arrow::random_real(size, 0, static_cast(0), static_cast(1), + &values); + } ::arrow::NumericBuilder builder; RETURN_NOT_OK(builder.AppendValues(values.data(), values.size())); return builder.Finish(out); @@ -202,12 +234,9 @@ ::arrow::enable_if_floating_point NullableArray( using c_type = typename ArrowType::c_type; std::vector values; if constexpr (::arrow::is_half_float_type::value) { - std::vector signed_values; - constexpr auto min = static_cast(0xf0e2); // -1e4 - constexpr auto max = static_cast(0x70e2); // +1e4 - ::arrow::randint(size, min, max, &signed_values); - std::transform(signed_values.begin(), signed_values.end(), std::back_inserter(values), - [](int16_t v) { return static_cast(v); }); + constexpr uint16_t min = 0xf0e2; // -1e4 + constexpr uint16_t max = 0x70e2; // +1e4 + values = RandomHalfFloatValues(size, min, max); } else { ::arrow::random_real(size, seed, static_cast(-1e10), static_cast(1e10), &values); From 17581057a9accdcd8583329926bac842533745f3 Mon Sep 17 00:00:00 2001 From: benibus Date: Mon, 21 Aug 2023 19:04:36 -0400 Subject: [PATCH 15/37] Support conversions to/from float32 --- cpp/src/arrow/util/float16.cc | 163 ++++++++++++++++++++++++++++- cpp/src/arrow/util/float16.h | 8 +- cpp/src/arrow/util/float16_test.cc | 91 ++++++++++++++++ 3 files changed, 259 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index 825cbf0cb1fa3..47e90f1ba050a 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -18,11 +18,172 @@ #include #include "arrow/util/float16.h" +#include "arrow/util/ubsan.h" namespace arrow { namespace util { -std::ostream& operator<<(std::ostream& os, Float16Base arg) { return (os << arg.bits()); } +namespace { + +// -------------------------------------------------------- +// Binary conversions +// -------------------------------------------------------- +// These routines are partially adapted from Numpy's C implementation +// +// Some useful metrics for conversions between different precisions: +// |-----------------------------------------| +// | precision | half | single | double | +// |-----------------------------------------| +// | mantissa | 10 bits | 23 bits | 53 bits | +// | exponent | 5 bits | 8 bits | 11 bits | +// | sign | 1 bit | 1 bit | 1 bit | +// | exp bias | 15 | 127 | 1023 | +// |-----------------------------------------| + +// Converts a IEEE binary32 into a binary16. Rounds to nearest with ties to zero +uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) { + // Sign mask for output binary16 + const uint16_t h_sign = uint16_t((f_bits >> 16) & 0x8000); + + // Exponent mask for input binary32 + const uint32_t f_exp = f_bits & 0x7f800000u; + // Exponents as signed pre-shifted values for convenience. Here, we need to re-bias the + // binary32 exponent for a binary16. If, after re-biasing, the binary16 exponent falls + // outside of the range [1,30] then we need to handle the under/overflow case specially. + const int16_t f_biased_exp = int16_t(f_exp >> 23); + const int16_t unbiased_exp = f_biased_exp - 127; + const int16_t h_biased_exp = unbiased_exp + 15; + + // Mantissa mask for input binary32 + const uint32_t f_mant = f_bits & 0x007fffffu; + + // Handle exponent overflow, NaN, and +/-Inf + if (h_biased_exp >= 0x1f) { + // The binary32 is a NaN representation + if (f_biased_exp == 0xff && f_mant != 0) { + uint16_t h_mant = uint16_t(f_mant >> 13); + // If the mantissa bit(s) indicating NaN were shifted out, add one back. Otherwise, + // the result would be infinity. + if (h_mant == 0) { + h_mant = 0x1; + } + return uint16_t(h_sign | 0x7c00u | h_mant); + } + + // Clamp to +/-infinity + return uint16_t(h_sign | 0x7c00u); + } + + // Handle exponent underflow, subnormals, and +/-0 + if (h_biased_exp <= 0) { + // If the underflow exceeds the number of bits in a binary16 mantissa (10) then we + // can't round, so just clamp to 0. Note that this also weeds out any binary32 values + // that are subnormal - including +/-0; + if (h_biased_exp < -10) { + return h_sign; + } + + // Convert to a rounded subnormal value starting with the mantissa. Since the input + // binary32 is known to be normal at this point, we need to prepend its implicit + // leading bit - which also necessitates an additional right-shift. + uint32_t rounded_mant = 0x800000u | f_mant; + rounded_mant >>= (1 - h_biased_exp); + + // Here, we implement rounding to nearest (with ties to even) + // + // By now, our new mantissa has two conceptual ranges: + // - The lower 13 bits, which will be shifted out + // - The upper 10 bits, which will become the binary16's mantissa + // + // We define a "rounding bit", which is the most significant bit to be dropped + // (0x1000). "Rounding to nearest" basically just means that we add 1 to the rounding + // bit. If it's set, then the bit will cascade upwards into the 10-bit mantissa (and + // potentially the exponent). + // + // The only time where we may NOT do this is when a "tie" occurs - i.e. when the + // rounding bit is set but all of the lower bits are 0. In that case, we don't add 1 + // if the retained mantissa is "even" (its least significant bit is 0). + if ((rounded_mant & 0x3fffu) != 0x1000u || (f_mant & 0x7ffu) != 0) { + rounded_mant += 0x1000u; + } + + const uint16_t h_mant = uint16_t(rounded_mant >> 13); + return h_sign + h_mant; + } + + const uint16_t h_exp = uint16_t(h_biased_exp) << 10; + + // See comment on rounding behavior above + uint32_t rounded_mant = f_mant; + if ((rounded_mant & 0x3fffu) != 0x1000u) { + rounded_mant += 0x1000u; + } + + const uint16_t h_mant = uint16_t(rounded_mant >> 13); + // Note that we ADD (rather than OR) the components because we want the carryover bit + // from rounding the mantissa to cascade through the exponent (it shouldn't affect the + // sign bit though). + return h_sign + h_exp + h_mant; +} + +// Converts a IEEE binary16 into a binary32 +uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) { + // Sign mask for output binary32 + const uint32_t f_sign = uint32_t(h_bits & 0x8000u) << 16; + + // Exponent mask for input binary16 + const uint16_t h_exp = h_bits & 0x7c00; + // Mantissa mask for input binary16 + const uint16_t h_mant = h_bits & 0x3ffu; + + switch (h_exp) { + // Handle Inf and NaN + case 0x7c00u: + return f_sign | 0x7f800000u | (uint32_t(h_mant) << 13); + // Handle zeros and subnormals + case 0x0000u: { + // Input is +/-0 + if (h_mant == 0) { + return f_sign; + } + // Subnormal binary16 to normal binary32 + // + // Start with an f32-biased exponent of 2^-15. We then decrement it until the most + // significant set bit is left-shifted out - as it doesn't get explicitly stored in + // normalized floating point values. Instead, its existence is implied by the new + // exponent. + uint32_t f_exp = 127 - 15; + uint32_t f_mant = uint32_t(h_mant) << 1; + while ((f_mant & 0x0400u) == 0) { + --f_exp; + f_mant <<= 1; + } + f_exp <<= 23; + f_mant = (f_mant & 0x03ffu) << 13; + return f_sign | f_exp | f_mant; + } break; + // Handle normals + default: + // Equivalent to adding (127 - 15) to the exponent and shifting everything by 13. + return f_sign | ((uint32_t(h_bits & 0x7fffu) + 0x1c000u) << 13); + } +} + +} // namespace + +float Float16Base::ToFloat() const { + const uint32_t f_bits = Binary16BitsToBinary32Bits(value_); + return SafeCopy(f_bits); +} + +Float16 Float16::FromFloat(float f) { + const uint32_t f_bits = SafeCopy(f); + return Float16{Binary32BitsToBinary16Bits(f_bits)}; +} + +std::ostream& operator<<(std::ostream& os, Float16Base arg) { + return (os << arg.ToFloat()); +} } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index 74308a09a3cfd..4d431291c1422 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -40,7 +40,7 @@ namespace util { /// /// NOTE: Methods in the class should not mutate the unerlying value or produce copies. /// Such functionality is delegated to subclasses. -class Float16Base { +class ARROW_EXPORT Float16Base { public: Float16Base() = default; constexpr explicit Float16Base(uint16_t value) : value_(value) {} @@ -98,6 +98,8 @@ class Float16Base { #endif } + float ToFloat() const; + friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; return Float16Base::CompareEq(lhs, rhs); @@ -149,13 +151,15 @@ class Float16Base { }; /// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t` -class Float16 : public Float16Base { +class ARROW_EXPORT Float16 : public Float16Base { public: using Float16Base::Float16Base; constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } constexpr Float16 operator+() const { return Float16(value_); } + static Float16 FromFloat(float f); + /// \brief Read a `Float16` from memory in native-endian byte order static Float16 FromBytes(const uint8_t* src) { return Float16(SafeLoadAs(src)); diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 1ccb9db7b0e25..2a7ebd1c5b435 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include @@ -33,6 +34,96 @@ namespace { template using Limits = std::numeric_limits; +float F32(uint32_t bits) { return SafeCopy(bits); } + +TEST(Float16Test, RoundTripFromFloat32) { + struct TestCase { + float f32; + uint16_t b16; + float f16_as_f32; + }; + // Expected values were also manually validated with numpy-1.24.3 + const TestCase test_cases[] = { + // +/-0.0f + {F32(0x80000000u), 0b1000000000000000u, -0.0f}, + {F32(0x00000000u), 0b0000000000000000u, +0.0f}, + // 32-bit exp is 102 => 2^-25. Rounding to nearest. + {F32(0xb3000001u), 0b1000000000000001u, -5.96046447754e-8f}, + // 32-bit exp is 102 => 2^-25. Rounding to even. + {F32(0xb3000000u), 0b1000000000000000u, -0.0f}, + // 32-bit exp is 101 => 2^-26. Underflow to zero. + {F32(0xb2800001u), 0b1000000000000000u, -0.0f}, + // 32-bit exp is 108 => 2^-19. + {F32(0xb61a0000u), 0b1000000000100110u, -2.26497650146e-6f}, + // 32-bit exp is 108 => 2^-19. + {F32(0xb61e0000u), 0b1000000000101000u, -2.38418579102e-6f}, + // 32-bit exp is 112 => 2^-15. Rounding to nearest. + {F32(0xb87fa001u), 0b1000001111111111u, -6.09755516052e-5f}, + // 32-bit exp is 112 => 2^-15. Rounds to 16-bit exp of 1 => 2^-14 + {F32(0xb87fe001u), 0b1000010000000000u, -6.103515625e-5f}, + // 32-bit exp is 142 => 2^15. Rounding to nearest. + {F32(0xc7001001u), 0b1111100000000001u, -32800.0f}, + // 32-bit exp is 142 => 2^15. Rounding to even. + {F32(0xc7001000u), 0b1111100000000000u, -32768.0f}, + // 65520.0f rounds to inf + {F32(0x477ff000u), 0b0111110000000000u, Limits::infinity()}, + // 65488.0039062f rounds to 65504.0 (float16 max) + {F32(0x477fd001u), 0b0111101111111111u, 65504.0f}, + // 32-bit exp is 127 => 2^0, rounds to 16-bit exp of 16 => 2^1. + {F32(0xbffff000u), 0b1100000000000000u, -2.0f}, + }; + + for (size_t index = 0; index < std::size(test_cases); ++index) { + ARROW_SCOPED_TRACE("index=", index); + const auto& tc = test_cases[index]; + const auto f16 = Float16::FromFloat(tc.f32); + EXPECT_EQ(tc.b16, f16.bits()); + EXPECT_EQ(tc.f16_as_f32, f16.ToFloat()); + } +} + +TEST(Float16Test, RoundTripFromFloat32Nan) { + const float nan_test_cases[] = { + Limits::quiet_NaN(), F32(0x7f800001u), F32(0xff800001u), F32(0x7fc00000u), + F32(0xff800001u), F32(0x7fffffffu), F32(0xffffffffu)}; + + for (size_t i = 0; i < std::size(nan_test_cases); ++i) { + ARROW_SCOPED_TRACE("i=", i); + const auto f32 = nan_test_cases[i]; + + ASSERT_TRUE(std::isnan(f32)); + const bool sign = std::signbit(f32); + + const auto f16 = Float16::FromFloat(f32); + EXPECT_TRUE(f16.is_nan()); + EXPECT_EQ(sign, f16.signbit()); + + const auto f16_as_f32 = f16.ToFloat(); + EXPECT_TRUE(std::isnan(f16_as_f32)); + EXPECT_EQ(sign, std::signbit(f16_as_f32)); + } +} + +TEST(Float16Test, RoundTripFromFloat32Inf) { + const float test_cases[] = {+Limits::infinity(), -Limits::infinity()}; + + for (size_t i = 0; i < std::size(test_cases); ++i) { + ARROW_SCOPED_TRACE("i=", i); + const auto f32 = test_cases[i]; + + ASSERT_TRUE(std::isinf(f32)); + const bool sign = std::signbit(f32); + + const auto f16 = Float16::FromFloat(f32); + EXPECT_TRUE(f16.is_infinity()); + EXPECT_EQ(sign, f16.signbit()); + + const auto f16_as_f32 = f16.ToFloat(); + EXPECT_TRUE(std::isinf(f16_as_f32)); + EXPECT_EQ(sign, std::signbit(f16_as_f32)); + } +} + // Holds a float16 and its equivalent float32 struct TestValue { TestValue(Float16 f16, float f32) : f16(f16), f32(f32) {} From d41a0c579d7b4deacb210805fb2b3fe0e2d84e7e Mon Sep 17 00:00:00 2001 From: benibus Date: Mon, 21 Aug 2023 19:53:40 -0400 Subject: [PATCH 16/37] Remove `Float16Base` class --- cpp/src/arrow/util/float16.cc | 6 +- cpp/src/arrow/util/float16.h | 100 +++++++++++++---------------- cpp/src/parquet/statistics_test.cc | 22 +++---- 3 files changed, 55 insertions(+), 73 deletions(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index 47e90f1ba050a..560a5e270396d 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -171,7 +171,7 @@ uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) { } // namespace -float Float16Base::ToFloat() const { +float Float16::ToFloat() const { const uint32_t f_bits = Binary16BitsToBinary32Bits(value_); return SafeCopy(f_bits); } @@ -181,9 +181,7 @@ Float16 Float16::FromFloat(float f) { return Float16{Binary32BitsToBinary16Bits(f_bits)}; } -std::ostream& operator<<(std::ostream& os, Float16Base arg) { - return (os << arg.ToFloat()); -} +std::ostream& operator<<(std::ostream& os, Float16 arg) { return (os << arg.ToFloat()); } } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index 4d431291c1422..d36d164f8fbad 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -31,19 +31,35 @@ namespace arrow { namespace util { -/// \brief Base class for an IEEE half-precision float, encoded as a `uint16_t` +/// \brief Class representing an IEEE half-precision float, encoded as a `uint16_t` /// -/// The exact format is as follows (from MSB to LSB): -/// - bit 0: sign -/// - bits 1-5: exponent -/// - bits 6-15: mantissa +/// The exact format is as follows (from LSB to MSB): +/// - bits 0-10: mantissa +/// - bits 10-15: exponent +/// - bit 15: sign /// -/// NOTE: Methods in the class should not mutate the unerlying value or produce copies. -/// Such functionality is delegated to subclasses. -class ARROW_EXPORT Float16Base { +class ARROW_EXPORT Float16 { public: - Float16Base() = default; - constexpr explicit Float16Base(uint16_t value) : value_(value) {} + Float16() = default; + constexpr explicit Float16(uint16_t value) : value_(value) {} + + /// \brief Create a `Float16` from a 32-bit float (may lose precision) + static Float16 FromFloat(float f); + + /// \brief Read a `Float16` from memory in native-endian byte order + static Float16 FromBytes(const uint8_t* src) { + return Float16(SafeLoadAs(src)); + } + + /// \brief Read a `Float16` from memory in little-endian byte order + static Float16 FromLittleEndian(const uint8_t* src) { + return Float16(bit_util::FromLittleEndian(SafeLoadAs(src))); + } + + /// \brief Read a `Float16` from memory in big-endian byte order + static Float16 FromBigEndian(const uint8_t* src) { + return Float16(bit_util::FromBigEndian(SafeLoadAs(src))); + } /// \brief Return the value's integer representation constexpr uint16_t bits() const { return value_; } @@ -61,6 +77,9 @@ class ARROW_EXPORT Float16Base { /// \brief Return true if the value is positive/negative zero constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } + /// \brief Convert to a 32-bit float + float ToFloat() const; + /// \brief Copy the value's bytes in native-endian byte order void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } /// \brief Return the value's bytes in native-endian byte order @@ -74,7 +93,7 @@ class ARROW_EXPORT Float16Base { /// \brief Copy the value's bytes in little-endian byte order void ToLittleEndian(uint8_t* dest) const { - Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(dest); + Float16{bit_util::ToLittleEndian(value_)}.ToBytes(dest); } /// \brief Return the value's bytes in little-endian byte order constexpr std::array ToLittleEndian() const { @@ -87,7 +106,7 @@ class ARROW_EXPORT Float16Base { /// \brief Copy the value's bytes in big-endian byte order void ToBigEndian(uint8_t* dest) const { - Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(dest); + Float16{bit_util::ToBigEndian(value_)}.ToBytes(dest); } /// \brief Return the value's bytes in big-endian byte order constexpr std::array ToBigEndian() const { @@ -98,41 +117,38 @@ class ARROW_EXPORT Float16Base { #endif } - float ToFloat() const; + constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } + constexpr Float16 operator+() const { return Float16(value_); } - friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) { + friend constexpr bool operator==(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; - return Float16Base::CompareEq(lhs, rhs); - } - friend constexpr bool operator!=(Float16Base lhs, Float16Base rhs) { - return !(lhs == rhs); + return Float16::CompareEq(lhs, rhs); } + friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); } - friend constexpr bool operator<(Float16Base lhs, Float16Base rhs) { + friend constexpr bool operator<(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; - return Float16Base::CompareLt(lhs, rhs); + return Float16::CompareLt(lhs, rhs); } - friend constexpr bool operator>(Float16Base lhs, Float16Base rhs) { return rhs < lhs; } + friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; } - friend constexpr bool operator<=(Float16Base lhs, Float16Base rhs) { + friend constexpr bool operator<=(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; - return !Float16Base::CompareLt(rhs, lhs); - } - friend constexpr bool operator>=(Float16Base lhs, Float16Base rhs) { - return rhs <= lhs; + return !Float16::CompareLt(rhs, lhs); } + friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; } - ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16Base arg); + ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg); protected: uint16_t value_; private: // Comparison helpers that assume neither operand is NaN - static constexpr bool CompareEq(Float16Base lhs, Float16Base rhs) { + static constexpr bool CompareEq(Float16 lhs, Float16 rhs) { return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero()); } - static constexpr bool CompareLt(Float16Base lhs, Float16Base rhs) { + static constexpr bool CompareLt(Float16 lhs, Float16 rhs) { if (lhs.signbit()) { if (rhs.signbit()) { // Both are negative @@ -150,32 +166,6 @@ class ARROW_EXPORT Float16Base { } }; -/// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t` -class ARROW_EXPORT Float16 : public Float16Base { - public: - using Float16Base::Float16Base; - - constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } - constexpr Float16 operator+() const { return Float16(value_); } - - static Float16 FromFloat(float f); - - /// \brief Read a `Float16` from memory in native-endian byte order - static Float16 FromBytes(const uint8_t* src) { - return Float16(SafeLoadAs(src)); - } - - /// \brief Read a `Float16` from memory in little-endian byte order - static Float16 FromLittleEndian(const uint8_t* src) { - return Float16(bit_util::FromLittleEndian(SafeLoadAs(src))); - } - - /// \brief Read a `Float16` from memory in big-endian byte order - static Float16 FromBigEndian(const uint8_t* src) { - return Float16(bit_util::FromBigEndian(SafeLoadAs(src))); - } -}; - static_assert(std::is_trivial_v); } // namespace util diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 7de4e3f3840bf..789c42f379799 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -63,22 +63,16 @@ using schema::PrimitiveNode; namespace test { -class BufferedFloat16 : public ::arrow::util::Float16Base { - public: - explicit BufferedFloat16(Float16 f16) : Float16Base(f16) { - buffer_ = *::arrow::AllocateBuffer(sizeof(value_)); - ToLittleEndian(buffer_->mutable_data()); +struct BufferedFloat16 { + explicit BufferedFloat16(Float16 f16) + : f16(f16), buffer(*::arrow::AllocateBuffer(sizeof(uint16_t))) { + this->f16.ToLittleEndian(buffer->mutable_data()); } - explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {} - - const uint8_t* bytes() const { return buffer_->data(); } - const std::shared_ptr<::arrow::Buffer>& buffer() { return buffer_; } - - BufferedFloat16 operator+() const { return *this; } - BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); } + explicit BufferedFloat16(uint16_t bits) : BufferedFloat16(Float16(bits)) {} + const uint8_t* bytes() const { return buffer->data(); } - private: - std::shared_ptr<::arrow::Buffer> buffer_; + Float16 f16; + std::shared_ptr<::arrow::Buffer> buffer; }; // ---------------------------------------------------------------------- From aaef4b4c76260dbf0d386f4d89332be4e40308e5 Mon Sep 17 00:00:00 2001 From: benibus Date: Tue, 22 Aug 2023 18:23:12 -0400 Subject: [PATCH 17/37] Update/restructure comparison tests --- cpp/src/arrow/util/float16_test.cc | 135 +++++++++++++---------------- 1 file changed, 61 insertions(+), 74 deletions(-) diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 2a7ebd1c5b435..e13cecc0f9046 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -124,92 +124,79 @@ TEST(Float16Test, RoundTripFromFloat32Inf) { } } -// Holds a float16 and its equivalent float32 -struct TestValue { - TestValue(Float16 f16, float f32) : f16(f16), f32(f32) {} - TestValue(uint16_t u16, float f32) : TestValue(Float16(u16), f32) {} +TEST(Float16Test, Compare) { + constexpr float f32_inf = Limits::infinity(); + constexpr float f32_nan = Limits::quiet_NaN(); - Float16 f16; - float f32; -}; - -#define GENERATE_OPERATOR(NAME, OP) \ - struct NAME { \ - std::pair operator()(TestValue l, TestValue r) { \ - return std::make_pair((l.f32 OP r.f32), (l.f16 OP r.f16)); \ - } \ - } - -GENERATE_OPERATOR(CompareEq, ==); -GENERATE_OPERATOR(CompareNe, !=); -GENERATE_OPERATOR(CompareLt, <); -GENERATE_OPERATOR(CompareGt, >); -GENERATE_OPERATOR(CompareLe, <=); -GENERATE_OPERATOR(CompareGe, >=); - -#undef GENERATE_OPERATOR - -const std::vector g_test_values = { - TestValue(Limits::min(), +0.00006104f), - TestValue(Limits::max(), +65504.0f), - TestValue(Limits::lowest(), -65504.0f), - TestValue(+Limits::infinity(), +Limits::infinity()), - TestValue(-Limits::infinity(), -Limits::infinity()), - // Multiple (semantically equivalent) NaN representations - TestValue(0x7fff, Limits::quiet_NaN()), - TestValue(0xffff, Limits::quiet_NaN()), - TestValue(0x7e00, Limits::quiet_NaN()), - TestValue(0xfe00, Limits::quiet_NaN()), - // Positive/negative zeroes - TestValue(0x0000, +0.0f), - TestValue(0x8000, -0.0f), - // Miscellaneous values. In general, they're chosen to test the sign/exponent and - // exponent/mantissa boundaries - TestValue(0x101c, +0.000502f), - TestValue(0x901c, -0.000502f), - TestValue(0x101d, +0.0005022f), - TestValue(0x901d, -0.0005022f), - TestValue(0x121c, +0.000746f), - TestValue(0x921c, -0.000746f), - TestValue(0x141c, +0.001004f), - TestValue(0x941c, -0.001004f), - TestValue(0x501c, +32.9f), - TestValue(0xd01c, -32.9f), - // A few subnormals for good measure - TestValue(0x001c, +0.0000017f), - TestValue(0x801c, -0.0000017f), - TestValue(0x021c, +0.0000332f), - TestValue(0x821c, -0.0000332f), -}; + const struct { + Float16 f16; + float f32; + } test_values[] = { + {Limits::min(), +6.103515625e-05f}, + {Limits::max(), +65504.0f}, + {Limits::lowest(), -65504.0f}, + {+Limits::infinity(), +f32_inf}, + {-Limits::infinity(), -f32_inf}, + // Multiple (semantically equivalent) NaN representations + {Float16(0x7e00), f32_nan}, + {Float16(0xfe00), f32_nan}, + {Float16(0x7fff), f32_nan}, + {Float16(0xffff), f32_nan}, + // Positive/negative zeros + {Float16(0x0000), +0.0f}, + {Float16(0x8000), -0.0f}, + // Miscellaneous values. In general, they're chosen to test the sign/exponent and + // exponent/mantissa boundaries + {Float16(0x101c), +0.00050163269043f}, + {Float16(0x901c), -0.00050163269043f}, + {Float16(0x101d), +0.000502109527588f}, + {Float16(0x901d), -0.000502109527588f}, + {Float16(0x121c), +0.00074577331543f}, + {Float16(0x921c), -0.00074577331543f}, + {Float16(0x141c), +0.00100326538086f}, + {Float16(0x941c), -0.00100326538086f}, + {Float16(0x501c), +32.875f}, + {Float16(0xd01c), -32.875f}, + // A few subnormals for good measure + {Float16(0x001c), +1.66893005371e-06f}, + {Float16(0x801c), -1.66893005371e-06f}, + {Float16(0x021c), +3.21865081787e-05f}, + {Float16(0x821c), -3.21865081787e-05f}, + }; -template -class Float16OperatorTest : public ::testing::Test { - public: - void TestCompare(const std::vector& test_values) { - const auto num_values = static_cast(test_values.size()); + auto expect_op = [&](std::string op_name, auto op) { + ARROW_SCOPED_TRACE(op_name); + const auto num_values = static_cast(std::size(test_values)); // Check all combinations of operands in both directions for (int i = 0; i < num_values; ++i) { for (int j = 0; j < num_values; ++j) { - ARROW_SCOPED_TRACE(i, ",", j); - - auto a = test_values[i]; - auto b = test_values[j]; + auto [a16, a32] = test_values[i]; + auto [b16, b32] = test_values[j]; + ARROW_SCOPED_TRACE("[", i, ",", j, "] = ", a16, ",", b16); // Results for float16 and float32 should be the same - auto ret = Operator{}(a, b); - ASSERT_EQ(ret.first, ret.second); + ASSERT_EQ(op(a16, b16), op(a32, b32)); } } - } -}; - -using OperatorTypes = - ::testing::Types; + }; -TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes); + // Verify that our "equivalent" 16/32-bit values actually are + for (const auto& v : test_values) { + if (std::isnan(v.f32)) { + ASSERT_TRUE(std::isnan(v.f16.ToFloat())); + } else { + ASSERT_EQ(v.f32, v.f16.ToFloat()); + } + } -TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); } + expect_op("equal", [](auto l, auto r) { return l == r; }); + expect_op("not_equal", [](auto l, auto r) { return l != r; }); + expect_op("less", [](auto l, auto r) { return l < r; }); + expect_op("greater", [](auto l, auto r) { return l > r; }); + expect_op("less_equal", [](auto l, auto r) { return l <= r; }); + expect_op("greater_equal", [](auto l, auto r) { return l >= r; }); +} TEST(Float16Test, ToBytes) { constexpr auto f16 = Float16(0xd01c); From 9e5cf1444631169e758434154d85b9a2cbda690a Mon Sep 17 00:00:00 2001 From: benibus Date: Fri, 25 Aug 2023 20:30:49 -0400 Subject: [PATCH 18/37] Fix comment --- cpp/src/arrow/util/float16.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index 560a5e270396d..5bdcdfab7ec49 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -34,7 +34,7 @@ namespace { // |-----------------------------------------| // | precision | half | single | double | // |-----------------------------------------| -// | mantissa | 10 bits | 23 bits | 53 bits | +// | mantissa | 10 bits | 23 bits | 52 bits | // | exponent | 5 bits | 8 bits | 11 bits | // | sign | 1 bit | 1 bit | 1 bit | // | exp bias | 15 | 127 | 1023 | From e12498654a502fc71c448b7468cd010a98b7b9c2 Mon Sep 17 00:00:00 2001 From: benibus Date: Fri, 25 Aug 2023 23:43:41 -0400 Subject: [PATCH 19/37] Minor changes to Float16 class/tests --- cpp/src/arrow/util/float16.h | 12 ++++++++---- cpp/src/arrow/util/float16_test.cc | 5 +++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index d36d164f8fbad..7c8597a8ec542 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -53,18 +53,20 @@ class ARROW_EXPORT Float16 { /// \brief Read a `Float16` from memory in little-endian byte order static Float16 FromLittleEndian(const uint8_t* src) { - return Float16(bit_util::FromLittleEndian(SafeLoadAs(src))); + return Float16(::arrow::bit_util::FromLittleEndian(SafeLoadAs(src))); } /// \brief Read a `Float16` from memory in big-endian byte order static Float16 FromBigEndian(const uint8_t* src) { - return Float16(bit_util::FromBigEndian(SafeLoadAs(src))); + return Float16(::arrow::bit_util::FromBigEndian(SafeLoadAs(src))); } /// \brief Return the value's integer representation constexpr uint16_t bits() const { return value_; } constexpr explicit operator uint16_t() const { return bits(); } + explicit operator float() const { return ToFloat(); } + /// \brief Return true if the value is negative (sign bit is set) constexpr bool signbit() const { return (value_ & 0x8000) != 0; } @@ -74,6 +76,8 @@ class ARROW_EXPORT Float16 { } /// \brief Return true if the value is positive/negative infinity constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } + /// \brief Return true if the value is finite and not NaN + constexpr bool is_finite() const { return (value_ & 0x7c00) != 0x7c00; } /// \brief Return true if the value is positive/negative zero constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } @@ -93,7 +97,7 @@ class ARROW_EXPORT Float16 { /// \brief Copy the value's bytes in little-endian byte order void ToLittleEndian(uint8_t* dest) const { - Float16{bit_util::ToLittleEndian(value_)}.ToBytes(dest); + Float16{::arrow::bit_util::ToLittleEndian(value_)}.ToBytes(dest); } /// \brief Return the value's bytes in little-endian byte order constexpr std::array ToLittleEndian() const { @@ -106,7 +110,7 @@ class ARROW_EXPORT Float16 { /// \brief Copy the value's bytes in big-endian byte order void ToBigEndian(uint8_t* dest) const { - Float16{bit_util::ToBigEndian(value_)}.ToBytes(dest); + Float16{::arrow::bit_util::ToBigEndian(value_)}.ToBytes(dest); } /// \brief Return the value's bytes in big-endian byte order constexpr std::array ToBigEndian() const { diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index e13cecc0f9046..d69bf6954ce77 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -79,6 +79,11 @@ TEST(Float16Test, RoundTripFromFloat32) { const auto f16 = Float16::FromFloat(tc.f32); EXPECT_EQ(tc.b16, f16.bits()); EXPECT_EQ(tc.f16_as_f32, f16.ToFloat()); + + EXPECT_EQ(std::signbit(tc.f16_as_f32), f16.signbit()); + EXPECT_EQ(std::isnan(tc.f16_as_f32), f16.is_nan()); + EXPECT_EQ(std::isinf(tc.f16_as_f32), f16.is_infinity()); + EXPECT_EQ(std::isfinite(tc.f16_as_f32), f16.is_finite()); } } From a12176ff614929584aa0b3e98706ef6d45ed5f4f Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 31 Aug 2023 15:34:17 -0400 Subject: [PATCH 20/37] Update statistics and tests --- cpp/src/parquet/statistics.cc | 15 ++++---- cpp/src/parquet/statistics_test.cc | 59 ++++++++++++------------------ 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index a7691dd568796..73caf2b46f555 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -558,7 +558,8 @@ class TypedStatisticsImpl : public TypedStatistics { : descr_(descr), pool_(pool), min_buffer_(AllocateBuffer(pool_, 0)), - max_buffer_(AllocateBuffer(pool_, 0)) { + max_buffer_(AllocateBuffer(pool_, 0)), + logical_type_(LogicalTypeId(descr_)) { comparator_ = MakeComparator(descr); TypedStatisticsImpl::Reset(); } @@ -628,13 +629,12 @@ class TypedStatisticsImpl : public TypedStatistics { bool Equals(const Statistics& raw_other) const override { if (physical_type() != raw_other.physical_type()) return false; - const auto logical_id = LogicalTypeId(*this); - const auto other_logical_id = LogicalTypeId(raw_other); + const auto other_logical_type = LogicalTypeId(raw_other); // Only compare against logical types that influence the interpretation of the // physical type - if (IsMeaningfulLogicalType(logical_id)) { - if (logical_id != other_logical_id) return false; - } else if (IsMeaningfulLogicalType(other_logical_id)) { + if (IsMeaningfulLogicalType(logical_type_)) { + if (logical_type_ != other_logical_type) return false; + } else if (IsMeaningfulLogicalType(other_logical_type)) { return false; } @@ -763,6 +763,7 @@ class TypedStatisticsImpl : public TypedStatistics { EncodedStatistics statistics_; std::shared_ptr> comparator_; std::shared_ptr min_buffer_, max_buffer_; + LogicalType::Type::type logical_type_ = LogicalType::Type::NONE; void PlainEncode(const T& src, std::string* dst) const; void PlainDecode(const std::string& src, T* dst) const; @@ -794,7 +795,7 @@ class TypedStatisticsImpl : public TypedStatistics { void SetMinMaxPair(std::pair min_max) { // CleanStatistic can return a nullopt in case of erroneous values, e.g. NaN - auto maybe_min_max = CleanStatistic(min_max, LogicalTypeId(*this)); + auto maybe_min_max = CleanStatistic(min_max, logical_type_); if (!maybe_min_max) return; auto min = maybe_min_max.value().first; diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 789c42f379799..cb2e6455abfa9 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -63,18 +63,6 @@ using schema::PrimitiveNode; namespace test { -struct BufferedFloat16 { - explicit BufferedFloat16(Float16 f16) - : f16(f16), buffer(*::arrow::AllocateBuffer(sizeof(uint16_t))) { - this->f16.ToLittleEndian(buffer->mutable_data()); - } - explicit BufferedFloat16(uint16_t bits) : BufferedFloat16(Float16(bits)) {} - const uint8_t* bytes() const { return buffer->data(); } - - Float16 f16; - std::shared_ptr<::arrow::Buffer> buffer; -}; - // ---------------------------------------------------------------------- // Test comparators @@ -1142,30 +1130,24 @@ void TestStatisticsSortOrder::SetValues() { constexpr int kValueLen = 2; constexpr int kNumBytes = NUM_VALUES * kValueLen; - const uint16_t u16_vals[NUM_VALUES] = { - 0b1100010100000000, // -5.0 - 0b1100010000000000, // -4.0 - 0b1100001000000000, // -3.0 - 0b1100000000000000, // -2.0 - 0b1011110000000000, // -1.0 - 0b0000000000000000, // +0.0 - 0b0011110000000000, // +1.0 - 0b0100000000000000, // +2.0 - 0b0100001000000000, // +3.0 - 0b0100010000000000, // +4.0 + const Float16 f16_vals[NUM_VALUES] = { + Float16::FromFloat(+2.0f), Float16::FromFloat(-4.0f), Float16::FromFloat(+4.0f), + Float16::FromFloat(-2.0f), Float16::FromFloat(-1.0f), Float16::FromFloat(+3.0f), + Float16::FromFloat(+1.0f), Float16::FromFloat(-5.0f), Float16::FromFloat(+0.0f), + Float16::FromFloat(-3.0f), }; values_buf_.resize(kNumBytes); uint8_t* ptr = values_buf_.data(); for (int i = 0; i < NUM_VALUES; ++i) { - Float16(u16_vals[i]).ToLittleEndian(ptr); + f16_vals[i].ToLittleEndian(ptr); values_[i].ptr = ptr; ptr += kValueLen; } stats_[0] - .set_min(std::string(reinterpret_cast(values_[0].ptr), kValueLen)) - .set_max(std::string(reinterpret_cast(values_[9].ptr), kValueLen)); + .set_min(std::string(reinterpret_cast(values_[7].ptr), kValueLen)) + .set_max(std::string(reinterpret_cast(values_[2].ptr), kValueLen)); } TYPED_TEST_SUITE(TestStatisticsSortOrder, CompareTestTypes); @@ -1503,6 +1485,17 @@ void TestFloatStatistics::TestNaNs() { valid_bitmap_no_nans); } +struct BufferedFloat16 { + explicit BufferedFloat16(Float16 f16) : f16(f16) { + this->f16.ToLittleEndian(bytes_.data()); + } + explicit BufferedFloat16(float f) : BufferedFloat16(Float16::FromFloat(f)) {} + const uint8_t* bytes() const { return bytes_.data(); } + + Float16 f16; + std::array bytes_; +}; + template <> void TestFloatStatistics::TestNaNs() { constexpr int kNumValues = 8; @@ -1512,22 +1505,18 @@ void TestFloatStatistics::TestNaNs() { using F16 = BufferedFloat16; const auto nan_f16 = F16(std::numeric_limits::quiet_NaN()); - const auto min_f16 = F16(0xc400); // -4.0 - const auto max_f16 = F16(0x4200); // +3.0 + const auto min_f16 = F16(-4.0f); + const auto max_f16 = F16(+3.0f); const auto min = FLBA{min_f16.bytes()}; const auto max = FLBA{max_f16.bytes()}; std::array all_nans_f16 = {nan_f16, nan_f16, nan_f16, nan_f16, nan_f16, nan_f16, nan_f16, nan_f16}; - std::array some_nans_f16 = {nan_f16, max_f16, - F16(0xc200), // -3.0 - F16(0xbc00), // -1.0 - nan_f16, - F16(0x4000), // +2.0 - min_f16, nan_f16}; + std::array some_nans_f16 = { + nan_f16, max_f16, F16(-3.0f), F16(-1.0f), nan_f16, F16(+2.0f), min_f16, nan_f16}; std::array other_nans_f16 = some_nans_f16; - other_nans_f16[0] = F16(0x3e00); // +1.5 + other_nans_f16[0] = F16(+1.5f); // +1.5 auto prepare_values = [](const auto& values) -> std::vector { std::vector out(values.size()); From 6496aefca6d979d1b9a636608d885097570a47e4 Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 31 Aug 2023 15:38:40 -0400 Subject: [PATCH 21/37] Update Arrow reader --- cpp/src/parquet/arrow/reader_internal.cc | 79 ++++-------------------- 1 file changed, 12 insertions(+), 67 deletions(-) diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc index f4e3a89e71a31..e5aef5a45b5f3 100644 --- a/cpp/src/parquet/arrow/reader_internal.cc +++ b/cpp/src/parquet/arrow/reader_internal.cc @@ -715,74 +715,14 @@ Status TransferDecimal(RecordReader* reader, MemoryPool* pool, return Status::OK(); } -static inline Status ConvertToHalfFloat(const Array& array, - const std::shared_ptr& type, - MemoryPool* pool, std::shared_ptr* out) { - constexpr int32_t byte_width = sizeof(uint16_t); - DCHECK_EQ(checked_cast(*type).byte_width(), byte_width); - - // We read the halffloat (uint16_t) bytes from a raw binary array, in which they're - // assumed to be little-endian. - const auto& binary_array = checked_cast(array); - DCHECK_EQ(checked_cast(*binary_array.type()) - .byte_width(), - byte_width); - - // Number of elements in the halffloat array - const int64_t length = binary_array.length(); - // Allocate data for the output halffloat array - ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * byte_width, pool)); - uint8_t* out_ptr = data->mutable_data(); - - const int64_t null_count = binary_array.null_count(); - // Copy the values to the output array in native-endian format - if (null_count > 0) { - for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) { - Float16 f16{0}; - if (binary_array.IsValid(i)) { - const uint8_t* in_ptr = binary_array.GetValue(i); - f16 = Float16::FromLittleEndian(in_ptr); - } - f16.ToBytes(out_ptr); - } - } else { -#if ARROW_LITTLE_ENDIAN - // No need to byte-swap, so do a simple copy - std::memcpy(out_ptr, binary_array.raw_values(), length * byte_width); -#else - for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) { - const uint8_t* in_ptr = binary_array.GetValue(i); - Float16::FromLittleEndian(in_ptr).ToBytes(out_ptr); - } -#endif - } - - *out = std::make_shared<::arrow::HalfFloatArray>( - type, length, std::move(data), binary_array.null_bitmap(), null_count); - return Status::OK(); -} - -/// \brief Convert an arrow::BinaryArray to an arrow::HalfFloatArray -/// We do this by: -/// 1. Creating an arrow::BinaryArray from the RecordReader's builder -/// 2. Allocating a buffer for the arrow::HalfFloatArray -/// 3. Converting the little-endian bytes in each BinaryArray entry to native-endian -/// halffloat (uint16_t) values Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool, const std::shared_ptr& field, Datum* out) { - auto binary_reader = dynamic_cast(reader); - DCHECK(binary_reader); - ::arrow::ArrayVector chunks = binary_reader->GetBuilderChunks(); - for (size_t i = 0; i < chunks.size(); ++i) { - std::shared_ptr chunk_as_half; - RETURN_NOT_OK(ConvertToHalfFloat(*chunks[i], field->type(), pool, &chunk_as_half)); - // Replace the chunk, which will hopefully also free memory as we go - chunks[i] = chunk_as_half; - } - if (!field->nullable()) { - ReconstructChunksWithoutNulls(&chunks); - } - *out = std::make_shared(chunks, field->type()); + static const auto binary_type = ::arrow::fixed_size_binary(2); + // Read as a FixedSizeBinaryArray - then, view as a HalfFloatArray + std::shared_ptr chunked_array; + RETURN_NOT_OK( + TransferBinary(reader, pool, field->WithType(binary_type), &chunked_array)); + ARROW_ASSIGN_OR_RAISE(*out, chunked_array->View(field->type())); return Status::OK(); } @@ -846,10 +786,15 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr& va result = chunked_result; } break; case ::arrow::Type::HALF_FLOAT: { + const auto& type = *value_field->type(); if (descr->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) { - return Status::Invalid("Physical type for ", value_field->type()->ToString(), + return Status::Invalid("Physical type for ", type.ToString(), " must be fixed length binary"); } + if (descr->type_length() != type.byte_width()) { + return Status::Invalid("Fixed length binary type for ", type.ToString(), + " must have a byte width of ", type.byte_width()); + } RETURN_NOT_OK(TransferHalfFloat(reader, pool, value_field, &result)); } break; case ::arrow::Type::DECIMAL128: { From 102dfb4c7fc22aea41e95ab614f65d773f7d7476 Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 31 Aug 2023 15:40:13 -0400 Subject: [PATCH 22/37] Remove big-endian handling in column writer --- cpp/src/parquet/column_writer.cc | 45 ++------------------------------ 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 715432d005492..a7e7b2f93e174 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -2301,21 +2301,10 @@ struct SerializeFunctor< // Write Arrow to Float16 // Requires a custom serializer because Float16s in Parquet are stored as a 2-byte -// (little-endian) FLBA, whereas in Arrow they're a native `uint16_t`. Also, a temporary -// buffer is needed if there's an endian mismatch. +// (little-endian) FLBA, whereas in Arrow they're a native `uint16_t`. template <> struct SerializeFunctor<::parquet::FLBAType, ::arrow::HalfFloatType> { - Status Serialize(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx, - FLBA* out) { -#if ARROW_LITTLE_ENDIAN - return SerializeInPlace(array, ctx, out); -#else - return SerializeWithScratch(array, ctx, out); -#endif - } - - Status SerializeInPlace(const ::arrow::HalfFloatArray& array, ArrowWriteContext*, - FLBA* out) { + Status Serialize(const ::arrow::HalfFloatArray& array, ArrowWriteContext*, FLBA* out) { const uint16_t* values = array.raw_values(); if (array.null_count() == 0) { for (int64_t i = 0; i < array.length(); ++i) { @@ -2329,40 +2318,10 @@ struct SerializeFunctor<::parquet::FLBAType, ::arrow::HalfFloatType> { return Status::OK(); } - Status SerializeWithScratch(const ::arrow::HalfFloatArray& array, - ArrowWriteContext* ctx, FLBA* out) { - AllocateScratch(array, ctx); - if (array.null_count() == 0) { - for (int64_t i = 0; i < array.length(); ++i) { - out[i] = ToFLBA(array.Value(i)); - } - } else { - for (int64_t i = 0; i < array.length(); ++i) { - out[i] = array.IsValid(i) ? ToFLBA(array.Value(i)) : FLBA{}; - } - } - return Status::OK(); - } - private: FLBA ToFLBA(const uint16_t* value_ptr) const { return FLBA{reinterpret_cast(value_ptr)}; } - FLBA ToFLBA(uint16_t value) { - auto* out = reinterpret_cast(scratch_++); - Float16(value).ToLittleEndian(out); - return FLBA{out}; - } - - void AllocateScratch(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx) { - int64_t non_null_count = array.length() - array.null_count(); - int64_t size = non_null_count * sizeof(uint16_t); - scratch_buffer_ = AllocateBuffer(ctx->memory_pool, size); - scratch_ = reinterpret_cast(scratch_buffer_->mutable_data()); - } - - std::shared_ptr scratch_buffer_; - uint16_t* scratch_; }; template <> From 2a45f290ced8179dffb9040e3ba5ad8ef6a1d3ac Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 31 Aug 2023 15:41:10 -0400 Subject: [PATCH 23/37] Tweak Arrow/Parquet schema tests --- cpp/src/parquet/arrow/arrow_schema_test.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index a1cc989ba8ea0..5443214f930d7 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -236,6 +236,8 @@ TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) { ::arrow::fixed_size_binary(12)}, {"uuid", LogicalType::UUID(), ParquetType::FIXED_LEN_BYTE_ARRAY, 16, ::arrow::fixed_size_binary(16)}, + {"float16", LogicalType::Float16(), ParquetType::FIXED_LEN_BYTE_ARRAY, 2, + ::arrow::float16()}, {"none", LogicalType::None(), ParquetType::BOOLEAN, -1, ::arrow::boolean()}, {"none", LogicalType::None(), ParquetType::INT32, -1, ::arrow::int32()}, {"none", LogicalType::None(), ParquetType::INT64, -1, ::arrow::int64()}, @@ -908,6 +910,23 @@ TEST_F(TestConvertArrowSchema, ArrowFields) { // ASSERT_NO_FATAL_FAILURE(); } +TEST_F(TestConvertArrowSchema, ArrowNonconvertibleFields) { + struct FieldConstructionArguments { + std::string name; + std::shared_ptr<::arrow::DataType> datatype; + }; + + std::vector cases = { + {"run_end_encoded", + ::arrow::run_end_encoded(::arrow::int32(), ::arrow::list(::arrow::int8()))}, + }; + + for (const FieldConstructionArguments& c : cases) { + auto field = ::arrow::field(c.name, c.datatype); + ASSERT_RAISES(NotImplemented, ConvertSchema({field})); + } +} + TEST_F(TestConvertArrowSchema, ParquetFlatPrimitivesAsDictionaries) { std::vector parquet_fields; std::vector> arrow_fields; From d340a827814ec5b732a90ed77291c93347e36ce6 Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 31 Aug 2023 15:42:04 -0400 Subject: [PATCH 24/37] Support `util::Float16` in `random_real` --- cpp/src/arrow/testing/random.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h index cbdac3baa0109..32ae97c11bfa2 100644 --- a/cpp/src/arrow/testing/random.h +++ b/cpp/src/arrow/testing/random.h @@ -28,6 +28,7 @@ #include "arrow/testing/uniform_real.h" #include "arrow/testing/visibility.h" #include "arrow/type.h" +#include "arrow/util/float16.h" namespace arrow { @@ -644,10 +645,20 @@ void randint(int64_t N, T lower, T upper, std::vector* out) { template void random_real(int64_t n, uint32_t seed, T min_value, T max_value, std::vector* out) { + using util::Float16; + std::default_random_engine gen(seed); - ::arrow::random::uniform_real_distribution d(min_value, max_value); - out->resize(n, static_cast(0)); - std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast(d(gen)); }); + out->resize(n, static_cast(T{0})); + if constexpr (std::is_same_v) { + ::arrow::random::uniform_real_distribution d(min_value.ToFloat(), + max_value.ToFloat()); + std::generate(out->begin(), out->end(), + [&d, &gen] { return static_cast(Float16::FromFloat(d(gen))); }); + } else { + ::arrow::random::uniform_real_distribution d(min_value, max_value); + std::generate(out->begin(), out->end(), + [&d, &gen] { return static_cast(d(gen)); }); + } } template From 6285af1da5be01e6b5e5abb528785d28f7e6f221 Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 31 Aug 2023 15:43:11 -0400 Subject: [PATCH 25/37] Update Arrow reader/writer tests --- .../parquet/arrow/arrow_reader_writer_test.cc | 6 +-- cpp/src/parquet/arrow/test_util.h | 38 ++++--------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 8f1c64b81322b..fb9e53870583c 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -926,11 +926,11 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) { TYPED_TEST(TestParquetIO, SingleColumnOptionalDictionaryWrite) { switch (TypeParam::type_id) { - // Skip tests for BOOL as we don't create dictionaries for it. case ::arrow::Type::BOOL: - // Skip tests for HALF_FLOAT as it's not currently supported by `dictionary_encode` + GTEST_SKIP() << "dictionaries not created for BOOL"; + break; case ::arrow::Type::HALF_FLOAT: - GTEST_SKIP(); + GTEST_SKIP() << "dictionary_encode not supported for HALF_FLOAT"; break; default: break; diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index 74b8a36df8592..bd9b3ffe24c46 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -33,6 +33,7 @@ #include "arrow/type_fwd.h" #include "arrow/type_traits.h" #include "arrow/util/decimal.h" +#include "arrow/util/float16.h" #include "parquet/column_reader.h" namespace parquet { @@ -65,40 +66,15 @@ struct Decimal256WithPrecisionAndScale { static constexpr int32_t scale = PRECISION - 1; }; -inline std::vector RandomHalfFloatValues(size_t size, uint16_t min, - uint16_t max) { - auto to_signed = [](uint16_t in) -> int16_t { - // Clamp magnitude to exclude representations of NaN/infinity. Within this range, - // binary float16s have the same ordering as int16s after conversion. - int16_t out = static_cast(std::max(in & 0x7fff, 0x7bff)); - // Negate if sign bit is set - return (in & 0x8000) != 0 ? -out : out; - }; - auto to_unsigned = [](int16_t in) -> uint16_t { - uint16_t out = static_cast(std::abs(in)); - // Set sign bit if negative - return in < 0 ? (out | 0x8000) : out; - }; - - const auto signed_min = to_signed(min); - const auto signed_max = to_signed(max); - std::vector signed_values; - ::arrow::randint(size, signed_min, signed_max, &signed_values); - - std::vector values(signed_values.size()); - std::transform(signed_values.begin(), signed_values.end(), values.begin(), to_unsigned); - return values; -} - template ::arrow::enable_if_floating_point NonNullArray( size_t size, std::shared_ptr* out) { using c_type = typename ArrowType::c_type; std::vector values; if constexpr (::arrow::is_half_float_type::value) { - constexpr uint16_t min = 0x0000; // 0.0 - constexpr uint16_t max = 0x3c00; // 1.0 - values = RandomHalfFloatValues(size, min, max); + using ::arrow::util::Float16; + ::arrow::random_real(size, 0, Float16::FromFloat(0.0f), Float16::FromFloat(1.0f), + &values); } else { ::arrow::random_real(size, 0, static_cast(0), static_cast(1), &values); @@ -234,9 +210,9 @@ ::arrow::enable_if_floating_point NullableArray( using c_type = typename ArrowType::c_type; std::vector values; if constexpr (::arrow::is_half_float_type::value) { - constexpr uint16_t min = 0xf0e2; // -1e4 - constexpr uint16_t max = 0x70e2; // +1e4 - values = RandomHalfFloatValues(size, min, max); + using ::arrow::util::Float16; + ::arrow::random_real(size, seed, Float16::FromFloat(-1e4f), Float16::FromFloat(1e4f), + &values); } else { ::arrow::random_real(size, seed, static_cast(-1e10), static_cast(1e10), &values); From 554de9deeb1feb8d5590395087e9d54d6298d453 Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 31 Aug 2023 15:44:25 -0400 Subject: [PATCH 26/37] Add To/FromDouble methods to `Float16` --- cpp/src/arrow/util/float16.cc | 141 ++++++++++++++++++++++------------ cpp/src/arrow/util/float16.h | 11 ++- 2 files changed, 100 insertions(+), 52 deletions(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index 5bdcdfab7ec49..0e9dfd820cafb 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include "arrow/util/float16.h" #include "arrow/util/ubsan.h" @@ -40,28 +41,54 @@ namespace { // | exp bias | 15 | 127 | 1023 | // |-----------------------------------------| -// Converts a IEEE binary32 into a binary16. Rounds to nearest with ties to zero -uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) { +template +struct BinaryConverter { + static_assert(std::is_same_v || std::is_same_v); + + static constexpr int kNumBits = sizeof(T) * 8; + static constexpr int kMantNumBits = (kNumBits == 32) ? 23 : 52; + static constexpr int kExpNumBits = kNumBits - kMantNumBits - 1; + + static constexpr int kExpBias = (1 << (kExpNumBits - 1)) - 1; + + static constexpr T kMantMask = (T(1) << kMantNumBits) - 1; + static constexpr T kExpMask = ((T(1) << kExpNumBits) - 1) << kMantNumBits; + static constexpr T kSignMask = T(1) << (kNumBits - 1); + + static_assert(kMantNumBits + kExpNumBits + 1 == kNumBits); + static_assert(kSignMask + kExpMask + kMantMask == ~T(0)); + + static uint16_t ToBinary16(T); + static T FromBinary16(uint16_t); +}; + +// Converts a IEEE binary32/64 into a binary16. Rounds to nearest with ties to zero +template +uint16_t BinaryConverter::ToBinary16(T f_bits) { // Sign mask for output binary16 - const uint16_t h_sign = uint16_t((f_bits >> 16) & 0x8000); + const uint16_t h_sign = uint16_t((f_bits >> (kNumBits - 16)) & 0x8000); - // Exponent mask for input binary32 - const uint32_t f_exp = f_bits & 0x7f800000u; + // Exponent mask for input binary + const T f_exp = f_bits & kExpMask; // Exponents as signed pre-shifted values for convenience. Here, we need to re-bias the - // binary32 exponent for a binary16. If, after re-biasing, the binary16 exponent falls - // outside of the range [1,30] then we need to handle the under/overflow case specially. - const int16_t f_biased_exp = int16_t(f_exp >> 23); - const int16_t unbiased_exp = f_biased_exp - 127; - const int16_t h_biased_exp = unbiased_exp + 15; + // exponent for a binary16. If, after re-biasing, the binary16 exponent falls outside of + // the range [1,30] then we need to handle the under/overflow case specially. + const int32_t f_biased_exp = int32_t(f_exp >> kMantNumBits); + const int32_t unbiased_exp = f_biased_exp - kExpBias; + const int32_t h_biased_exp = unbiased_exp + 15; - // Mantissa mask for input binary32 - const uint32_t f_mant = f_bits & 0x007fffffu; + // Mantissa mask for input + const T f_mant = f_bits & kMantMask; + + // We define a "rounding bit", which is the most significant bit to be dropped + // (e.g. for a binary32, 0x1000). + constexpr T rounding_bit = T(1) << (kMantNumBits - (10 + 1)); // Handle exponent overflow, NaN, and +/-Inf if (h_biased_exp >= 0x1f) { - // The binary32 is a NaN representation + // The input is a NaN representation if (f_biased_exp == 0xff && f_mant != 0) { - uint16_t h_mant = uint16_t(f_mant >> 13); + uint16_t h_mant = uint16_t(f_mant >> (kMantNumBits - 10)); // If the mantissa bit(s) indicating NaN were shifted out, add one back. Otherwise, // the result would be infinity. if (h_mant == 0) { @@ -77,16 +104,16 @@ uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) { // Handle exponent underflow, subnormals, and +/-0 if (h_biased_exp <= 0) { // If the underflow exceeds the number of bits in a binary16 mantissa (10) then we - // can't round, so just clamp to 0. Note that this also weeds out any binary32 values + // can't round, so just clamp to 0. Note that this also weeds out any input values // that are subnormal - including +/-0; if (h_biased_exp < -10) { return h_sign; } // Convert to a rounded subnormal value starting with the mantissa. Since the input - // binary32 is known to be normal at this point, we need to prepend its implicit - // leading bit - which also necessitates an additional right-shift. - uint32_t rounded_mant = 0x800000u | f_mant; + // input is known to be normal at this point, we need to prepend its implicit leading + // bit - which also necessitates an additional right-shift. + T rounded_mant = (T(1) << kMantNumBits) | f_mant; rounded_mant >>= (1 - h_biased_exp); // Here, we implement rounding to nearest (with ties to even) @@ -95,41 +122,41 @@ uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) { // - The lower 13 bits, which will be shifted out // - The upper 10 bits, which will become the binary16's mantissa // - // We define a "rounding bit", which is the most significant bit to be dropped - // (0x1000). "Rounding to nearest" basically just means that we add 1 to the rounding - // bit. If it's set, then the bit will cascade upwards into the 10-bit mantissa (and - // potentially the exponent). - // - // The only time where we may NOT do this is when a "tie" occurs - i.e. when the - // rounding bit is set but all of the lower bits are 0. In that case, we don't add 1 - // if the retained mantissa is "even" (its least significant bit is 0). - if ((rounded_mant & 0x3fffu) != 0x1000u || (f_mant & 0x7ffu) != 0) { - rounded_mant += 0x1000u; + // "Rounding to nearest" basically just means that we add 1 to the rounding bit. If + // it's set, then the bit will cascade upwards into the 10-bit mantissa (and + // potentially the exponent). The only time where we may NOT do this is when a "tie" + // occurs - i.e. when the rounding bit is set but all of the lower bits are 0. In that + // case, we don't add 1 if the retained mantissa is "even" (its least significant bit + // is 0). + if ((rounded_mant & ((rounding_bit << 2) - 1)) != rounding_bit || + (f_mant & 0x7ffu) != 0) { + rounded_mant += rounding_bit; } - const uint16_t h_mant = uint16_t(rounded_mant >> 13); + const uint16_t h_mant = uint16_t(rounded_mant >> (kMantNumBits - 10)); return h_sign + h_mant; } const uint16_t h_exp = uint16_t(h_biased_exp) << 10; // See comment on rounding behavior above - uint32_t rounded_mant = f_mant; - if ((rounded_mant & 0x3fffu) != 0x1000u) { - rounded_mant += 0x1000u; + T rounded_mant = f_mant; + if ((rounded_mant & ((rounding_bit << 2) - 1)) != rounding_bit) { + rounded_mant += rounding_bit; } - const uint16_t h_mant = uint16_t(rounded_mant >> 13); + const uint16_t h_mant = uint16_t(rounded_mant >> (kMantNumBits - 10)); // Note that we ADD (rather than OR) the components because we want the carryover bit // from rounding the mantissa to cascade through the exponent (it shouldn't affect the // sign bit though). return h_sign + h_exp + h_mant; } -// Converts a IEEE binary16 into a binary32 -uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) { - // Sign mask for output binary32 - const uint32_t f_sign = uint32_t(h_bits & 0x8000u) << 16; +// Converts a IEEE binary16 into a binary32/64 +template +T BinaryConverter::FromBinary16(uint16_t h_bits) { + // Sign mask for output + const T f_sign = T(h_bits & 0x8000u) << (kNumBits - 16); // Exponent mask for input binary16 const uint16_t h_exp = h_bits & 0x7c00; @@ -139,46 +166,58 @@ uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) { switch (h_exp) { // Handle Inf and NaN case 0x7c00u: - return f_sign | 0x7f800000u | (uint32_t(h_mant) << 13); + return f_sign | kExpMask | (T(h_mant) << (kMantNumBits - 10)); // Handle zeros and subnormals case 0x0000u: { // Input is +/-0 if (h_mant == 0) { return f_sign; } - // Subnormal binary16 to normal binary32 + // Subnormal binary16 to normal binary32/64 // - // Start with an f32-biased exponent of 2^-15. We then decrement it until the most - // significant set bit is left-shifted out - as it doesn't get explicitly stored in - // normalized floating point values. Instead, its existence is implied by the new - // exponent. - uint32_t f_exp = 127 - 15; - uint32_t f_mant = uint32_t(h_mant) << 1; + // Start with an f32/64-biased exponent of 2^-15. We then decrement it until the + // most significant set bit is left-shifted out - as it doesn't get explicitly + // stored in normalized floating point values. Instead, its existence is implied by + // the new exponent. + T f_exp = kExpBias - 15; + T f_mant = T(h_mant) << 1; while ((f_mant & 0x0400u) == 0) { --f_exp; f_mant <<= 1; } - f_exp <<= 23; - f_mant = (f_mant & 0x03ffu) << 13; + f_exp <<= kMantNumBits; + f_mant = (f_mant & 0x03ffu) << (kMantNumBits - 10); return f_sign | f_exp | f_mant; } break; // Handle normals default: - // Equivalent to adding (127 - 15) to the exponent and shifting everything by 13. - return f_sign | ((uint32_t(h_bits & 0x7fffu) + 0x1c000u) << 13); + // Equivalent to rebiasing the exponent and shifting everything by the remaining + // mantissa bits. + return f_sign | + ((T(h_bits & 0x7fffu) + (T(kExpBias - 15) << 10)) << (kMantNumBits - 10)); } } } // namespace float Float16::ToFloat() const { - const uint32_t f_bits = Binary16BitsToBinary32Bits(value_); + const uint32_t f_bits = BinaryConverter::FromBinary16(value_); return SafeCopy(f_bits); } Float16 Float16::FromFloat(float f) { const uint32_t f_bits = SafeCopy(f); - return Float16{Binary32BitsToBinary16Bits(f_bits)}; + return Float16{BinaryConverter::ToBinary16(f_bits)}; +} + +double Float16::ToDouble() const { + const uint64_t d_bits = BinaryConverter::FromBinary16(value_); + return SafeCopy(d_bits); +} + +Float16 Float16::FromDouble(double d) { + const uint64_t d_bits = SafeCopy(d); + return Float16{BinaryConverter::ToBinary16(d_bits)}; } std::ostream& operator<<(std::ostream& os, Float16 arg) { return (os << arg.ToFloat()); } diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index 7c8597a8ec542..7ae3b31767518 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -39,12 +39,18 @@ namespace util { /// - bit 15: sign /// class ARROW_EXPORT Float16 { + constexpr static uint16_t ToBits(uint16_t bits) { return bits; } public: Float16() = default; - constexpr explicit Float16(uint16_t value) : value_(value) {} + // constexpr explicit Float16(uint16_t value) : value_(value) {} + + template >* = nullptr> + constexpr explicit Float16(T value) : value_(ToBits(value)) {} /// \brief Create a `Float16` from a 32-bit float (may lose precision) static Float16 FromFloat(float f); + /// \brief Create a `Float16` from a 64-bit float (may lose precision) + static Float16 FromDouble(double d); /// \brief Read a `Float16` from memory in native-endian byte order static Float16 FromBytes(const uint8_t* src) { @@ -66,6 +72,7 @@ class ARROW_EXPORT Float16 { constexpr explicit operator uint16_t() const { return bits(); } explicit operator float() const { return ToFloat(); } + explicit operator double() const { return ToDouble(); } /// \brief Return true if the value is negative (sign bit is set) constexpr bool signbit() const { return (value_ & 0x8000) != 0; } @@ -83,6 +90,8 @@ class ARROW_EXPORT Float16 { /// \brief Convert to a 32-bit float float ToFloat() const; + /// \brief Convert to a 64-bit float + double ToDouble() const; /// \brief Copy the value's bytes in native-endian byte order void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } From 40e58f5599e6d2d1168bed0db928366c0333e221 Mon Sep 17 00:00:00 2001 From: benibus Date: Mon, 4 Sep 2023 23:07:17 -0400 Subject: [PATCH 27/37] Add tests for `double` conversions --- cpp/src/arrow/util/float16.cc | 2 +- cpp/src/arrow/util/float16_test.cc | 197 ++++++++++++++++++++--------- 2 files changed, 141 insertions(+), 58 deletions(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index 0e9dfd820cafb..9bf002734c5fa 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -87,7 +87,7 @@ uint16_t BinaryConverter::ToBinary16(T f_bits) { // Handle exponent overflow, NaN, and +/-Inf if (h_biased_exp >= 0x1f) { // The input is a NaN representation - if (f_biased_exp == 0xff && f_mant != 0) { + if (f_exp == kExpMask && f_mant != 0) { uint16_t h_mant = uint16_t(f_mant >> (kMantNumBits - 10)); // If the mantissa bit(s) indicating NaN were shifted out, add one back. Otherwise, // the result would be infinity. diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index d69bf6954ce77..64d573e0d4aa0 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -18,32 +18,102 @@ #include #include #include -#include #include #include "arrow/testing/gtest_util.h" #include "arrow/util/endian.h" #include "arrow/util/float16.h" +#include "arrow/util/span.h" #include "arrow/util/ubsan.h" -namespace arrow { -namespace util { +namespace arrow::util { namespace { template using Limits = std::numeric_limits; float F32(uint32_t bits) { return SafeCopy(bits); } +double F64(uint64_t bits) { return SafeCopy(bits); } -TEST(Float16Test, RoundTripFromFloat32) { - struct TestCase { - float f32; - uint16_t b16; - float f16_as_f32; +Float16 ToFloat16(float f32) { return Float16::FromFloat(f32); } +Float16 ToFloat16(double f64) { return Float16::FromDouble(f64); } + +template +class Float16ConversionTest : public ::testing::Test { + public: + struct RoundTripTestCase { + T input; + uint16_t bits; + T output; }; + + static void TestRoundTrip(span test_cases) { + for (size_t index = 0; index < test_cases.size(); ++index) { + ARROW_SCOPED_TRACE("i=", index); + const auto& tc = test_cases[index]; + + const auto f16 = ToFloat16(tc.input); + EXPECT_EQ(tc.bits, f16.bits()); + EXPECT_EQ(tc.output, static_cast(f16)); + + EXPECT_EQ(std::signbit(tc.output), f16.signbit()); + EXPECT_EQ(std::isnan(tc.output), f16.is_nan()); + EXPECT_EQ(std::isinf(tc.output), f16.is_infinity()); + EXPECT_EQ(std::isfinite(tc.output), f16.is_finite()); + } + } + + static void TestRoundTripFromNaN(span test_cases) { + for (size_t i = 0; i < test_cases.size(); ++i) { + ARROW_SCOPED_TRACE("i=", i); + const auto input = test_cases[i]; + + ASSERT_TRUE(std::isnan(input)); + const bool sign = std::signbit(input); + + const Float16 f16 = ToFloat16(input); + EXPECT_TRUE(f16.is_nan()); + EXPECT_EQ(std::isinf(input), f16.is_infinity()); + EXPECT_EQ(std::isfinite(input), f16.is_finite()); + EXPECT_EQ(sign, f16.signbit()); + + const auto output = static_cast(f16); + EXPECT_TRUE(std::isnan(output)); + EXPECT_EQ(sign, std::signbit(output)); + } + } + + void TestRoundTripFromInf() { + const T test_cases[] = {+Limits::infinity(), -Limits::infinity()}; + + for (size_t i = 0; i < std::size(test_cases); ++i) { + ARROW_SCOPED_TRACE("i=", i); + const auto input = test_cases[i]; + + ASSERT_TRUE(std::isinf(input)); + const bool sign = std::signbit(input); + + const Float16 f16 = ToFloat16(input); + EXPECT_TRUE(f16.is_infinity()); + EXPECT_EQ(std::isfinite(input), f16.is_finite()); + EXPECT_EQ(std::isnan(input), f16.is_nan()); + EXPECT_EQ(sign, f16.signbit()); + + const auto output = static_cast(f16); + EXPECT_TRUE(std::isinf(output)); + EXPECT_EQ(sign, std::signbit(output)); + } + } + + void TestRoundTrip(); + void TestRoundTripFromNaN(); +}; + +template <> +void Float16ConversionTest::TestRoundTrip() { // Expected values were also manually validated with numpy-1.24.3 - const TestCase test_cases[] = { + const RoundTripTestCase test_cases[] = { // +/-0.0f {F32(0x80000000u), 0b1000000000000000u, -0.0f}, {F32(0x00000000u), 0b0000000000000000u, +0.0f}, @@ -71,63 +141,77 @@ TEST(Float16Test, RoundTripFromFloat32) { {F32(0x477fd001u), 0b0111101111111111u, 65504.0f}, // 32-bit exp is 127 => 2^0, rounds to 16-bit exp of 16 => 2^1. {F32(0xbffff000u), 0b1100000000000000u, -2.0f}, + // Extreme values should safely clamp to +/-inf + {Limits::max(), 0b0111110000000000u, +Limits::infinity()}, + {Limits::lowest(), 0b1111110000000000u, -Limits::infinity()}, }; - for (size_t index = 0; index < std::size(test_cases); ++index) { - ARROW_SCOPED_TRACE("index=", index); - const auto& tc = test_cases[index]; - const auto f16 = Float16::FromFloat(tc.f32); - EXPECT_EQ(tc.b16, f16.bits()); - EXPECT_EQ(tc.f16_as_f32, f16.ToFloat()); - - EXPECT_EQ(std::signbit(tc.f16_as_f32), f16.signbit()); - EXPECT_EQ(std::isnan(tc.f16_as_f32), f16.is_nan()); - EXPECT_EQ(std::isinf(tc.f16_as_f32), f16.is_infinity()); - EXPECT_EQ(std::isfinite(tc.f16_as_f32), f16.is_finite()); - } + TestRoundTrip(span(test_cases, std::size(test_cases))); } -TEST(Float16Test, RoundTripFromFloat32Nan) { - const float nan_test_cases[] = { - Limits::quiet_NaN(), F32(0x7f800001u), F32(0xff800001u), F32(0x7fc00000u), - F32(0xff800001u), F32(0x7fffffffu), F32(0xffffffffu)}; - - for (size_t i = 0; i < std::size(nan_test_cases); ++i) { - ARROW_SCOPED_TRACE("i=", i); - const auto f32 = nan_test_cases[i]; - - ASSERT_TRUE(std::isnan(f32)); - const bool sign = std::signbit(f32); - - const auto f16 = Float16::FromFloat(f32); - EXPECT_TRUE(f16.is_nan()); - EXPECT_EQ(sign, f16.signbit()); +template <> +void Float16ConversionTest::TestRoundTrip() { + // Expected values were also manually validated with numpy-1.24.3 + const RoundTripTestCase test_cases[] = { + // +/-0.0 + {F64(0x8000000000000000u), 0b1000000000000000u, -0.0}, + {F64(0x0000000000000000u), 0b0000000000000000u, +0.0}, + // 64-bit exp is 998 => 2^-25. Rounding to nearest. + {F64(0xbe60000000000001u), 0b1000000000000001u, -5.9604644775390625e-8}, + // 64-bit exp is 998 => 2^-25. Rounding to even. + {F64(0xbe60000000000000u), 0b1000000000000000u, -0.0}, + // 64-bit exp is 997 => 2^-26. Underflow to zero. + {F64(0xbe50000000000001u), 0b1000000000000000u, -0.0}, + // 64-bit exp is 1004 => 2^-19. + {F64(0xbec3400000000000u), 0b1000000000100110u, -2.2649765014648438e-6}, + // 64-bit exp is 1004 => 2^-19. + {F64(0xbec3c00000000000u), 0b1000000000101000u, -2.3841857910156250e-6}, + // 64-bit exp is 1008 => 2^-15. Rounding to nearest. + {F64(0xbf0ff40000000001u), 0b1000001111111111u, -6.0975551605224609e-5}, + // 64-bit exp is 1008 => 2^-15. Rounds to 16-bit exp of 1 => 2^-14 + {F64(0xbf0ffc0000000001u), 0b1000010000000000u, -6.1035156250000000e-5}, + // 64-bit exp is 1038 => 2^15. Rounding to nearest. + {F64(0xc0e0020000000001u), 0b1111100000000001u, -32800.0}, + // 64-bit exp is 1038 => 2^15. Rounding to even. + {F64(0xc0e0020000000000u), 0b1111100000000000u, -32768.0}, + // 65520.0 rounds to inf + {F64(0x40effe0000000000u), 0b0111110000000000u, Limits::infinity()}, + // 65488.00000000001 rounds to 65504.0 (float16 max) + {F64(0x40effa0000000001u), 0b0111101111111111u, 65504.0}, + // 64-bit exp is 1023 => 2^0, rounds to 16-bit exp of 16 => 2^1. + {F64(0xbffffe0000000000u), 0b1100000000000000u, -2.0}, + // Extreme values should safely clamp to +/-inf + {Limits::max(), 0b0111110000000000u, +Limits::infinity()}, + {Limits::lowest(), 0b1111110000000000u, -Limits::infinity()}, + }; - const auto f16_as_f32 = f16.ToFloat(); - EXPECT_TRUE(std::isnan(f16_as_f32)); - EXPECT_EQ(sign, std::signbit(f16_as_f32)); - } + TestRoundTrip(span(test_cases, std::size(test_cases))); } -TEST(Float16Test, RoundTripFromFloat32Inf) { - const float test_cases[] = {+Limits::infinity(), -Limits::infinity()}; +template <> +void Float16ConversionTest::TestRoundTripFromNaN() { + const float test_cases[] = { + Limits::quiet_NaN(), F32(0x7f800001u), F32(0xff800001u), F32(0x7fc00000u), + F32(0xffc00000u), F32(0x7fffffffu), F32(0xffffffffu)}; + TestRoundTripFromNaN(span(test_cases, std::size(test_cases))); +} - for (size_t i = 0; i < std::size(test_cases); ++i) { - ARROW_SCOPED_TRACE("i=", i); - const auto f32 = test_cases[i]; +template <> +void Float16ConversionTest::TestRoundTripFromNaN() { + const double test_cases[] = {Limits::quiet_NaN(), F64(0x7ff0000000000001u), + F64(0xfff0000000000001u), F64(0x7ff8000000000000u), + F64(0xfff8000000000000u), F64(0x7fffffffffffffffu), + F64(0xffffffffffffffffu)}; + TestRoundTripFromNaN(span(test_cases, std::size(test_cases))); +} - ASSERT_TRUE(std::isinf(f32)); - const bool sign = std::signbit(f32); +using NativeFloatTypes = ::testing::Types; - const auto f16 = Float16::FromFloat(f32); - EXPECT_TRUE(f16.is_infinity()); - EXPECT_EQ(sign, f16.signbit()); +TYPED_TEST_SUITE(Float16ConversionTest, NativeFloatTypes); - const auto f16_as_f32 = f16.ToFloat(); - EXPECT_TRUE(std::isinf(f16_as_f32)); - EXPECT_EQ(sign, std::signbit(f16_as_f32)); - } -} +TYPED_TEST(Float16ConversionTest, RoundTrip) { this->TestRoundTrip(); } +TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN(); } +TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); } TEST(Float16Test, Compare) { constexpr float f32_inf = Limits::infinity(); @@ -247,5 +331,4 @@ TEST(Float16Test, FromBytes) { } } // namespace -} // namespace util -} // namespace arrow +} // namespace arrow::util From 7407ce4c267fa70420f2ee6b4cc92651bfdab5fb Mon Sep 17 00:00:00 2001 From: benibus Date: Mon, 4 Sep 2023 23:11:19 -0400 Subject: [PATCH 28/37] Change misleading types --- cpp/src/arrow/util/float16.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index 9bf002734c5fa..bc2b8e455d3bc 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -73,9 +73,9 @@ uint16_t BinaryConverter::ToBinary16(T f_bits) { // Exponents as signed pre-shifted values for convenience. Here, we need to re-bias the // exponent for a binary16. If, after re-biasing, the binary16 exponent falls outside of // the range [1,30] then we need to handle the under/overflow case specially. - const int32_t f_biased_exp = int32_t(f_exp >> kMantNumBits); - const int32_t unbiased_exp = f_biased_exp - kExpBias; - const int32_t h_biased_exp = unbiased_exp + 15; + const int16_t f_biased_exp = int16_t(f_exp >> kMantNumBits); + const int16_t unbiased_exp = f_biased_exp - kExpBias; + const int16_t h_biased_exp = unbiased_exp + 15; // Mantissa mask for input const T f_mant = f_bits & kMantMask; From bb4ca6a0b3400faea73aa1a1d716e48c088987a1 Mon Sep 17 00:00:00 2001 From: benibus Date: Mon, 4 Sep 2023 23:32:48 -0400 Subject: [PATCH 29/37] Some `Float16` API changes Reverted several prior changes that were accidentally pushed Enabled construction from native floats Removed `uint16_t` conversion operator since it doesn't behave consistently with standard floats. As a result, rolled back some of the prior changes to `random_real` used in the Parquet test utils --- cpp/src/arrow/testing/random.h | 17 +++-------------- cpp/src/arrow/util/float16.h | 18 ++++++++++-------- cpp/src/arrow/util/float16_test.cc | 26 ++++++++++++++++++++------ cpp/src/parquet/arrow/test_util.h | 22 ++++++++++++++++------ 4 files changed, 49 insertions(+), 34 deletions(-) diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h index 32ae97c11bfa2..cbdac3baa0109 100644 --- a/cpp/src/arrow/testing/random.h +++ b/cpp/src/arrow/testing/random.h @@ -28,7 +28,6 @@ #include "arrow/testing/uniform_real.h" #include "arrow/testing/visibility.h" #include "arrow/type.h" -#include "arrow/util/float16.h" namespace arrow { @@ -645,20 +644,10 @@ void randint(int64_t N, T lower, T upper, std::vector* out) { template void random_real(int64_t n, uint32_t seed, T min_value, T max_value, std::vector* out) { - using util::Float16; - std::default_random_engine gen(seed); - out->resize(n, static_cast(T{0})); - if constexpr (std::is_same_v) { - ::arrow::random::uniform_real_distribution d(min_value.ToFloat(), - max_value.ToFloat()); - std::generate(out->begin(), out->end(), - [&d, &gen] { return static_cast(Float16::FromFloat(d(gen))); }); - } else { - ::arrow::random::uniform_real_distribution d(min_value, max_value); - std::generate(out->begin(), out->end(), - [&d, &gen] { return static_cast(d(gen)); }); - } + ::arrow::random::uniform_real_distribution d(min_value, max_value); + out->resize(n, static_cast(0)); + std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast(d(gen)); }); } template diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index 7ae3b31767518..c9e4594c0cd5d 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -25,6 +25,7 @@ #include #include "arrow/util/endian.h" +#include "arrow/util/macros.h" #include "arrow/util/ubsan.h" #include "arrow/util/visibility.h" @@ -39,18 +40,20 @@ namespace util { /// - bit 15: sign /// class ARROW_EXPORT Float16 { - constexpr static uint16_t ToBits(uint16_t bits) { return bits; } public: Float16() = default; - // constexpr explicit Float16(uint16_t value) : value_(value) {} + constexpr explicit Float16(uint16_t value) : value_(value) {} - template >* = nullptr> - constexpr explicit Float16(T value) : value_(ToBits(value)) {} + template >* = NULLPTR> + explicit Float16(T f) : Float16(FromNative(f)) {} /// \brief Create a `Float16` from a 32-bit float (may lose precision) static Float16 FromFloat(float f); /// \brief Create a `Float16` from a 64-bit float (may lose precision) static Float16 FromDouble(double d); + /// \brief Create a `Float16` from a native floating-point value (may lose precision) + static Float16 FromNative(float f) { return FromFloat(f); } + static Float16 FromNative(double d) { return FromDouble(d); } /// \brief Read a `Float16` from memory in native-endian byte order static Float16 FromBytes(const uint8_t* src) { @@ -69,10 +72,6 @@ class ARROW_EXPORT Float16 { /// \brief Return the value's integer representation constexpr uint16_t bits() const { return value_; } - constexpr explicit operator uint16_t() const { return bits(); } - - explicit operator float() const { return ToFloat(); } - explicit operator double() const { return ToDouble(); } /// \brief Return true if the value is negative (sign bit is set) constexpr bool signbit() const { return (value_ & 0x8000) != 0; } @@ -93,6 +92,9 @@ class ARROW_EXPORT Float16 { /// \brief Convert to a 64-bit float double ToDouble() const; + explicit operator float() const { return ToFloat(); } + explicit operator double() const { return ToDouble(); } + /// \brief Copy the value's bytes in native-endian byte order void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } /// \brief Return the value's bytes in native-endian byte order diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 64d573e0d4aa0..4e49532bdd6d3 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -36,9 +36,6 @@ using Limits = std::numeric_limits; float F32(uint32_t bits) { return SafeCopy(bits); } double F64(uint64_t bits) { return SafeCopy(bits); } -Float16 ToFloat16(float f32) { return Float16::FromFloat(f32); } -Float16 ToFloat16(double f64) { return Float16::FromDouble(f64); } - template class Float16ConversionTest : public ::testing::Test { public: @@ -53,7 +50,7 @@ class Float16ConversionTest : public ::testing::Test { ARROW_SCOPED_TRACE("i=", index); const auto& tc = test_cases[index]; - const auto f16 = ToFloat16(tc.input); + const auto f16 = Float16(tc.input); EXPECT_EQ(tc.bits, f16.bits()); EXPECT_EQ(tc.output, static_cast(f16)); @@ -72,7 +69,7 @@ class Float16ConversionTest : public ::testing::Test { ASSERT_TRUE(std::isnan(input)); const bool sign = std::signbit(input); - const Float16 f16 = ToFloat16(input); + const auto f16 = Float16(input); EXPECT_TRUE(f16.is_nan()); EXPECT_EQ(std::isinf(input), f16.is_infinity()); EXPECT_EQ(std::isfinite(input), f16.is_finite()); @@ -94,7 +91,7 @@ class Float16ConversionTest : public ::testing::Test { ASSERT_TRUE(std::isinf(input)); const bool sign = std::signbit(input); - const Float16 f16 = ToFloat16(input); + const auto f16 = Float16(input); EXPECT_TRUE(f16.is_infinity()); EXPECT_EQ(std::isfinite(input), f16.is_finite()); EXPECT_EQ(std::isnan(input), f16.is_nan()); @@ -213,6 +210,23 @@ TYPED_TEST(Float16ConversionTest, RoundTrip) { this->TestRoundTrip(); } TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN(); } TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); } +TEST(Float16Test, Constructors) { + constexpr auto from_int_0 = Float16(0); + constexpr auto from_int_1 = Float16(1); + const auto from_f32_0 = Float16(0.0f); + const auto from_f32_1 = Float16(1.0f); + const auto from_f64_0 = Float16(0.0); + const auto from_f64_1 = Float16(1.0); + + ASSERT_EQ(0, from_int_0.bits()); + ASSERT_EQ(0, from_f32_0.bits()); + ASSERT_EQ(0, from_f64_0.bits()); + + ASSERT_EQ(1, from_int_1.bits()); + ASSERT_EQ(0x3c00, from_f32_1.bits()); + ASSERT_EQ(0x3c00, from_f64_1.bits()); +} + TEST(Float16Test, Compare) { constexpr float f32_inf = Limits::infinity(); constexpr float f32_nan = Limits::quiet_NaN(); diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index bd9b3ffe24c46..6036f47514eb1 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -66,15 +66,26 @@ struct Decimal256WithPrecisionAndScale { static constexpr int32_t scale = PRECISION - 1; }; +inline void RandomHalfFloatValues(int64_t n, uint32_t seed, + ::arrow::util::Float16 min_value, + ::arrow::util::Float16 max_value, + std::vector* out) { + std::vector values; + ::arrow::random_real(n, seed, static_cast(min_value), + static_cast(max_value), &values); + out->resize(values.size()); + std::transform(values.begin(), values.end(), out->begin(), + [](float f) { return ::arrow::util::Float16(f).bits(); }); +} + template ::arrow::enable_if_floating_point NonNullArray( size_t size, std::shared_ptr* out) { using c_type = typename ArrowType::c_type; std::vector values; if constexpr (::arrow::is_half_float_type::value) { - using ::arrow::util::Float16; - ::arrow::random_real(size, 0, Float16::FromFloat(0.0f), Float16::FromFloat(1.0f), - &values); + RandomHalfFloatValues(size, 0, ::arrow::util::Float16(0.0f), + ::arrow::util::Float16(1.0f), &values); } else { ::arrow::random_real(size, 0, static_cast(0), static_cast(1), &values); @@ -210,9 +221,8 @@ ::arrow::enable_if_floating_point NullableArray( using c_type = typename ArrowType::c_type; std::vector values; if constexpr (::arrow::is_half_float_type::value) { - using ::arrow::util::Float16; - ::arrow::random_real(size, seed, Float16::FromFloat(-1e4f), Float16::FromFloat(1e4f), - &values); + RandomHalfFloatValues(size, seed, ::arrow::util::Float16(-1e4f), + ::arrow::util::Float16(1e4f), &values); } else { ::arrow::random_real(size, seed, static_cast(-1e10), static_cast(1e10), &values); From 354f6f6e6eafa930579f296769b7e834978690de Mon Sep 17 00:00:00 2001 From: benibus Date: Tue, 5 Sep 2023 16:52:37 -0400 Subject: [PATCH 30/37] Refactor typed comparators --- cpp/src/parquet/statistics.cc | 39 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 73caf2b46f555..0cea53d8f73bf 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -296,7 +296,8 @@ template struct CompareHelper : public BinaryLikeCompareHelperBase {}; -struct Float16CompareHelper { +template <> +struct CompareHelper { using T = FLBA; static T DefaultMin() { return T{Float16Constants::max()}; } @@ -412,12 +413,24 @@ optional> CleanStatistic( return min_max; } -template > -class TypedComparatorImpl : virtual public TypedComparator { +template +struct RebindLogical { + using DType = T; + using c_type = typename DType::c_type; +}; + +template <> +struct RebindLogical { + using DType = FLBAType; + using c_type = DType::c_type; +}; + +template +class TypedComparatorImpl + : virtual public TypedComparator::DType> { public: - using T = typename DType::c_type; - using Helper = HelperType; + using T = typename RebindLogical::c_type; + using Helper = CompareHelper; explicit TypedComparatorImpl(int type_length = -1) : type_length_(type_length) {} @@ -464,7 +477,9 @@ class TypedComparatorImpl : virtual public TypedComparator { return {min, max}; } - std::pair GetMinMax(const ::arrow::Array& values) override; + std::pair GetMinMax(const ::arrow::Array& values) override { + ParquetException::NYI(values.type()->ToString()); + } private: int type_length_; @@ -492,12 +507,6 @@ TypedComparatorImpl::GetMinMax(const int32_t* va return {SafeCopy(min), SafeCopy(max)}; } -template -std::pair -TypedComparatorImpl::GetMinMax(const ::arrow::Array& values) { - ParquetException::NYI(values.type()->ToString()); -} - template std::pair GetMinMaxBinaryHelper( const TypedComparatorImpl& comparator, @@ -926,8 +935,8 @@ std::shared_ptr DoMakeComparator(Type::type physical_type, return std::make_shared>(); case Type::FIXED_LEN_BYTE_ARRAY: if (logical_type == LogicalType::Type::FLOAT16) { - return std::make_shared< - TypedComparatorImpl>(type_length); + return std::make_shared>( + type_length); } return std::make_shared>(type_length); default: From ea5f5dc031991f2c2474fb10a5cf48d7dae8c47c Mon Sep 17 00:00:00 2001 From: benibus Date: Tue, 5 Sep 2023 16:53:55 -0400 Subject: [PATCH 31/37] Add logical type to docs --- docs/source/cpp/parquet.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst index 23fca8fd73010..3e06352f5dde3 100644 --- a/docs/source/cpp/parquet.rst +++ b/docs/source/cpp/parquet.rst @@ -481,6 +481,8 @@ physical type. +-------------------+-----------------------------+----------------------------+---------+ | MAP | Any | Map | \(6) | +-------------------+-----------------------------+----------------------------+---------+ +| FLOAT16 | FIXED_LENGTH_BYTE_ARRAY | HalfFloat | | ++-------------------+-----------------------------+----------------------------+---------+ * \(1) On the write side, the Parquet physical type INT32 is generated. From ab846302c4e2667627adfd979b9bfaca1e29ccb1 Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 19 Oct 2023 14:14:44 -0400 Subject: [PATCH 32/37] Replace public `Float16(uint16_t)` constructor --- cpp/src/arrow/util/float16.cc | 4 +- cpp/src/arrow/util/float16.h | 40 +++++++++-------- cpp/src/arrow/util/float16_test.cc | 70 ++++++++++++++---------------- cpp/src/parquet/statistics.cc | 8 ++-- 4 files changed, 59 insertions(+), 63 deletions(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index bc2b8e455d3bc..873c2e1cea534 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -207,7 +207,7 @@ float Float16::ToFloat() const { Float16 Float16::FromFloat(float f) { const uint32_t f_bits = SafeCopy(f); - return Float16{BinaryConverter::ToBinary16(f_bits)}; + return FromBits(BinaryConverter::ToBinary16(f_bits)); } double Float16::ToDouble() const { @@ -217,7 +217,7 @@ double Float16::ToDouble() const { Float16 Float16::FromDouble(double d) { const uint64_t d_bits = SafeCopy(d); - return Float16{BinaryConverter::ToBinary16(d_bits)}; + return FromBits(BinaryConverter::ToBinary16(d_bits)); } std::ostream& operator<<(std::ostream& os, Float16 arg) { return (os << arg.ToFloat()); } diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index c9e4594c0cd5d..5ba39e62e4328 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -42,32 +42,32 @@ namespace util { class ARROW_EXPORT Float16 { public: Float16() = default; - constexpr explicit Float16(uint16_t value) : value_(value) {} - - template >* = NULLPTR> - explicit Float16(T f) : Float16(FromNative(f)) {} - + explicit Float16(float f) : Float16(FromFloat(f)) {} + explicit Float16(double d) : Float16(FromDouble(d)) {} + template >* = NULLPTR> + explicit Float16(T v) : Float16(static_cast(v)) {} + + /// \brief Create a `Float16` from its exact binary representation + constexpr static Float16 FromBits(uint16_t bits) { return Float16{bits, bool{}}; } /// \brief Create a `Float16` from a 32-bit float (may lose precision) static Float16 FromFloat(float f); /// \brief Create a `Float16` from a 64-bit float (may lose precision) static Float16 FromDouble(double d); - /// \brief Create a `Float16` from a native floating-point value (may lose precision) - static Float16 FromNative(float f) { return FromFloat(f); } - static Float16 FromNative(double d) { return FromDouble(d); } /// \brief Read a `Float16` from memory in native-endian byte order static Float16 FromBytes(const uint8_t* src) { - return Float16(SafeLoadAs(src)); + return FromBits(SafeLoadAs(src)); } /// \brief Read a `Float16` from memory in little-endian byte order static Float16 FromLittleEndian(const uint8_t* src) { - return Float16(::arrow::bit_util::FromLittleEndian(SafeLoadAs(src))); + return FromBits(::arrow::bit_util::FromLittleEndian(SafeLoadAs(src))); } /// \brief Read a `Float16` from memory in big-endian byte order static Float16 FromBigEndian(const uint8_t* src) { - return Float16(::arrow::bit_util::FromBigEndian(SafeLoadAs(src))); + return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs(src))); } /// \brief Return the value's integer representation @@ -108,7 +108,7 @@ class ARROW_EXPORT Float16 { /// \brief Copy the value's bytes in little-endian byte order void ToLittleEndian(uint8_t* dest) const { - Float16{::arrow::bit_util::ToLittleEndian(value_)}.ToBytes(dest); + FromBits(::arrow::bit_util::ToLittleEndian(value_)).ToBytes(dest); } /// \brief Return the value's bytes in little-endian byte order constexpr std::array ToLittleEndian() const { @@ -121,7 +121,7 @@ class ARROW_EXPORT Float16 { /// \brief Copy the value's bytes in big-endian byte order void ToBigEndian(uint8_t* dest) const { - Float16{::arrow::bit_util::ToBigEndian(value_)}.ToBytes(dest); + FromBits(::arrow::bit_util::ToBigEndian(value_)).ToBytes(dest); } /// \brief Return the value's bytes in big-endian byte order constexpr std::array ToBigEndian() const { @@ -132,8 +132,8 @@ class ARROW_EXPORT Float16 { #endif } - constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } - constexpr Float16 operator+() const { return Float16(value_); } + constexpr Float16 operator-() const { return FromBits(value_ ^ 0x8000); } + constexpr Float16 operator+() const { return FromBits(value_); } friend constexpr bool operator==(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; @@ -159,6 +159,8 @@ class ARROW_EXPORT Float16 { uint16_t value_; private: + constexpr Float16(uint16_t value, bool) : value_(value) {} + // Comparison helpers that assume neither operand is NaN static constexpr bool CompareEq(Float16 lhs, Float16 rhs) { return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero()); @@ -197,11 +199,11 @@ class std::numeric_limits { static constexpr bool has_infinity = true; static constexpr bool has_quiet_NaN = true; - static constexpr T min() { return T(0b0000010000000000); } - static constexpr T max() { return T(0b0111101111111111); } + static constexpr T min() { return T::FromBits(0b0000010000000000); } + static constexpr T max() { return T::FromBits(0b0111101111111111); } static constexpr T lowest() { return -max(); } - static constexpr T infinity() { return T(0b0111110000000000); } + static constexpr T infinity() { return T::FromBits(0b0111110000000000); } - static constexpr T quiet_NaN() { return T(0b0111111111111111); } + static constexpr T quiet_NaN() { return T::FromBits(0b0111111111111111); } }; diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 4e49532bdd6d3..dc8833d871baf 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -211,20 +211,14 @@ TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); } TEST(Float16Test, Constructors) { - constexpr auto from_int_0 = Float16(0); - constexpr auto from_int_1 = Float16(1); - const auto from_f32_0 = Float16(0.0f); - const auto from_f32_1 = Float16(1.0f); - const auto from_f64_0 = Float16(0.0); - const auto from_f64_1 = Float16(1.0); - - ASSERT_EQ(0, from_int_0.bits()); - ASSERT_EQ(0, from_f32_0.bits()); - ASSERT_EQ(0, from_f64_0.bits()); - - ASSERT_EQ(1, from_int_1.bits()); - ASSERT_EQ(0x3c00, from_f32_1.bits()); - ASSERT_EQ(0x3c00, from_f64_1.bits()); + // Construction from exact bits + ASSERT_EQ(1, Float16::FromBits(1).bits()); + // Construction from floating point (including implicit conversions) + int i = 0; + for (auto f16 : {Float16(1.0f), Float16(1.0), Float16(1)}) { + ARROW_SCOPED_TRACE("i=", i++); + ASSERT_EQ(0x3c00, f16.bits()); + } } TEST(Float16Test, Compare) { @@ -241,30 +235,30 @@ TEST(Float16Test, Compare) { {+Limits::infinity(), +f32_inf}, {-Limits::infinity(), -f32_inf}, // Multiple (semantically equivalent) NaN representations - {Float16(0x7e00), f32_nan}, - {Float16(0xfe00), f32_nan}, - {Float16(0x7fff), f32_nan}, - {Float16(0xffff), f32_nan}, + {Float16::FromBits(0x7e00), f32_nan}, + {Float16::FromBits(0xfe00), f32_nan}, + {Float16::FromBits(0x7fff), f32_nan}, + {Float16::FromBits(0xffff), f32_nan}, // Positive/negative zeros - {Float16(0x0000), +0.0f}, - {Float16(0x8000), -0.0f}, + {Float16::FromBits(0x0000), +0.0f}, + {Float16::FromBits(0x8000), -0.0f}, // Miscellaneous values. In general, they're chosen to test the sign/exponent and // exponent/mantissa boundaries - {Float16(0x101c), +0.00050163269043f}, - {Float16(0x901c), -0.00050163269043f}, - {Float16(0x101d), +0.000502109527588f}, - {Float16(0x901d), -0.000502109527588f}, - {Float16(0x121c), +0.00074577331543f}, - {Float16(0x921c), -0.00074577331543f}, - {Float16(0x141c), +0.00100326538086f}, - {Float16(0x941c), -0.00100326538086f}, - {Float16(0x501c), +32.875f}, - {Float16(0xd01c), -32.875f}, + {Float16::FromBits(0x101c), +0.00050163269043f}, + {Float16::FromBits(0x901c), -0.00050163269043f}, + {Float16::FromBits(0x101d), +0.000502109527588f}, + {Float16::FromBits(0x901d), -0.000502109527588f}, + {Float16::FromBits(0x121c), +0.00074577331543f}, + {Float16::FromBits(0x921c), -0.00074577331543f}, + {Float16::FromBits(0x141c), +0.00100326538086f}, + {Float16::FromBits(0x941c), -0.00100326538086f}, + {Float16::FromBits(0x501c), +32.875f}, + {Float16::FromBits(0xd01c), -32.875f}, // A few subnormals for good measure - {Float16(0x001c), +1.66893005371e-06f}, - {Float16(0x801c), -1.66893005371e-06f}, - {Float16(0x021c), +3.21865081787e-05f}, - {Float16(0x821c), -3.21865081787e-05f}, + {Float16::FromBits(0x001c), +1.66893005371e-06f}, + {Float16::FromBits(0x801c), -1.66893005371e-06f}, + {Float16::FromBits(0x021c), +3.21865081787e-05f}, + {Float16::FromBits(0x821c), -3.21865081787e-05f}, }; auto expect_op = [&](std::string op_name, auto op) { @@ -302,7 +296,7 @@ TEST(Float16Test, Compare) { } TEST(Float16Test, ToBytes) { - constexpr auto f16 = Float16(0xd01c); + constexpr auto f16 = Float16::FromBits(0xd01c); std::array bytes; auto load = [&bytes]() { return SafeLoadAs(bytes.data()); }; @@ -334,10 +328,10 @@ TEST(Float16Test, ToBytes) { TEST(Float16Test, FromBytes) { constexpr uint16_t u16 = 0xd01c; const auto* data = reinterpret_cast(&u16); - ASSERT_EQ(Float16::FromBytes(data), Float16(0xd01c)); + ASSERT_EQ(Float16::FromBytes(data), Float16::FromBits(0xd01c)); #if ARROW_LITTLE_ENDIAN - ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0xd01c)); - ASSERT_EQ(Float16::FromBigEndian(data), Float16(0x1cd0)); + ASSERT_EQ(Float16::FromLittleEndian(data), Float16::FromBits(0xd01c)); + ASSERT_EQ(Float16::FromBigEndian(data), Float16::FromBits(0x1cd0)); #else ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0)); ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c)); diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 0cea53d8f73bf..37b245e0dd6c2 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -68,8 +68,8 @@ struct Float16Constants { static constexpr Bytes lowest_ = std::numeric_limits::lowest().ToLittleEndian(); static constexpr Bytes max_ = std::numeric_limits::max().ToLittleEndian(); - static constexpr Bytes positive_zero_ = (+Float16(0)).ToLittleEndian(); - static constexpr Bytes negative_zero_ = (-Float16(0)).ToLittleEndian(); + static constexpr Bytes positive_zero_ = (+Float16::FromBits(0)).ToLittleEndian(); + static constexpr Bytes negative_zero_ = (-Float16::FromBits(0)).ToLittleEndian(); }; template @@ -384,10 +384,10 @@ optional> CleanFloat16Statistic(std::pair min_ return ::std::nullopt; } - if (min == Float16(0)) { + if (min.is_zero() && !min.signbit()) { min_flba = FLBA{Float16Constants::negative_zero()}; } - if (max == -Float16(0)) { + if (max.is_zero() && max.signbit()) { max_flba = FLBA{Float16Constants::positive_zero()}; } From c8404bbafddf9237e8247161a122d3054adf9f88 Mon Sep 17 00:00:00 2001 From: benibus Date: Thu, 19 Oct 2023 15:26:15 -0400 Subject: [PATCH 33/37] `Float16` tweaks, add constexpr tests --- cpp/src/arrow/util/float16.cc | 4 ++-- cpp/src/arrow/util/float16.h | 38 ++++++++++++++---------------- cpp/src/arrow/util/float16_test.cc | 25 ++++++++++++++++++++ 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc index 873c2e1cea534..5c8b3d10ca0cd 100644 --- a/cpp/src/arrow/util/float16.cc +++ b/cpp/src/arrow/util/float16.cc @@ -201,7 +201,7 @@ T BinaryConverter::FromBinary16(uint16_t h_bits) { } // namespace float Float16::ToFloat() const { - const uint32_t f_bits = BinaryConverter::FromBinary16(value_); + const uint32_t f_bits = BinaryConverter::FromBinary16(bits_); return SafeCopy(f_bits); } @@ -211,7 +211,7 @@ Float16 Float16::FromFloat(float f) { } double Float16::ToDouble() const { - const uint64_t d_bits = BinaryConverter::FromBinary16(value_); + const uint64_t d_bits = BinaryConverter::FromBinary16(bits_); return SafeCopy(d_bits); } diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index 5ba39e62e4328..888936797c870 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -70,22 +70,20 @@ class ARROW_EXPORT Float16 { return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs(src))); } - /// \brief Return the value's integer representation - constexpr uint16_t bits() const { return value_; } + /// \brief Return the value's binary representation as a `uint16_t` + constexpr uint16_t bits() const { return bits_; } /// \brief Return true if the value is negative (sign bit is set) - constexpr bool signbit() const { return (value_ & 0x8000) != 0; } + constexpr bool signbit() const { return (bits_ & 0x8000) != 0; } /// \brief Return true if the value is NaN - constexpr bool is_nan() const { - return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0; - } + constexpr bool is_nan() const { return (bits_ & 0x7fff) > 0x7c00; } /// \brief Return true if the value is positive/negative infinity - constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } + constexpr bool is_infinity() const { return (bits_ & 0x7fff) == 0x7c00; } /// \brief Return true if the value is finite and not NaN - constexpr bool is_finite() const { return (value_ & 0x7c00) != 0x7c00; } + constexpr bool is_finite() const { return (bits_ & 0x7c00) != 0x7c00; } /// \brief Return true if the value is positive/negative zero - constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } + constexpr bool is_zero() const { return (bits_ & 0x7fff) == 0; } /// \brief Convert to a 32-bit float float ToFloat() const; @@ -96,7 +94,7 @@ class ARROW_EXPORT Float16 { explicit operator double() const { return ToDouble(); } /// \brief Copy the value's bytes in native-endian byte order - void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } + void ToBytes(uint8_t* dest) const { std::memcpy(dest, &bits_, sizeof(bits_)); } /// \brief Return the value's bytes in native-endian byte order constexpr std::array ToBytes() const { #if ARROW_LITTLE_ENDIAN @@ -108,32 +106,32 @@ class ARROW_EXPORT Float16 { /// \brief Copy the value's bytes in little-endian byte order void ToLittleEndian(uint8_t* dest) const { - FromBits(::arrow::bit_util::ToLittleEndian(value_)).ToBytes(dest); + FromBits(::arrow::bit_util::ToLittleEndian(bits_)).ToBytes(dest); } /// \brief Return the value's bytes in little-endian byte order constexpr std::array ToLittleEndian() const { #if ARROW_LITTLE_ENDIAN - return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; + return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)}; #else - return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; + return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)}; #endif } /// \brief Copy the value's bytes in big-endian byte order void ToBigEndian(uint8_t* dest) const { - FromBits(::arrow::bit_util::ToBigEndian(value_)).ToBytes(dest); + FromBits(::arrow::bit_util::ToBigEndian(bits_)).ToBytes(dest); } /// \brief Return the value's bytes in big-endian byte order constexpr std::array ToBigEndian() const { #if ARROW_LITTLE_ENDIAN - return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; + return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)}; #else - return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; + return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)}; #endif } - constexpr Float16 operator-() const { return FromBits(value_ ^ 0x8000); } - constexpr Float16 operator+() const { return FromBits(value_); } + constexpr Float16 operator-() const { return FromBits(bits_ ^ 0x8000); } + constexpr Float16 operator+() const { return FromBits(bits_); } friend constexpr bool operator==(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; @@ -156,10 +154,10 @@ class ARROW_EXPORT Float16 { ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg); protected: - uint16_t value_; + uint16_t bits_; private: - constexpr Float16(uint16_t value, bool) : value_(value) {} + constexpr Float16(uint16_t bits, bool) : bits_(bits) {} // Comparison helpers that assume neither operand is NaN static constexpr bool CompareEq(Float16 lhs, Float16 rhs) { diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index dc8833d871baf..073375882e3c2 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -210,6 +210,31 @@ TYPED_TEST(Float16ConversionTest, RoundTrip) { this->TestRoundTrip(); } TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN(); } TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); } +TEST(Float16Test, ConstexprFunctions) { + constexpr auto a = Float16::FromBits(0xbc00); // -1.0 + constexpr auto b = Float16::FromBits(0x3c00); // +1.0 + + static_assert(a.bits() == 0xbc00); + static_assert(a.signbit() == true); + static_assert(a.is_nan() == false); + static_assert(a.is_infinity() == false); + static_assert(a.is_finite() == true); + static_assert(a.is_zero() == false); + + static_assert((a == b) == false); + static_assert((a != b) == true); + static_assert((a < b) == true); + static_assert((a > b) == false); + static_assert((a <= b) == true); + static_assert((a >= b) == false); + static_assert(-a == +b); + + constexpr auto v = Float16::FromBits(0xffff); + static_assert(v.ToBytes()[0] == 0xff); + static_assert(v.ToLittleEndian()[0] == 0xff); + static_assert(v.ToBigEndian()[0] == 0xff); +} + TEST(Float16Test, Constructors) { // Construction from exact bits ASSERT_EQ(1, Float16::FromBits(1).bits()); From 157e0d7e1a07eb1a410cb7ad62069dce7f7c515a Mon Sep 17 00:00:00 2001 From: benibus Date: Fri, 20 Oct 2023 16:59:35 -0400 Subject: [PATCH 34/37] Add test for `ColumnIndex`/`BoundaryOrder` --- cpp/src/parquet/page_index_test.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc index 5bfe38522af7b..4db49b4267415 100644 --- a/cpp/src/parquet/page_index_test.cc +++ b/cpp/src/parquet/page_index_test.cc @@ -21,6 +21,7 @@ #include #include "arrow/io/file.h" +#include "arrow/util/float16.h" #include "parquet/file_reader.h" #include "parquet/metadata.h" #include "parquet/schema.h" @@ -579,6 +580,27 @@ TEST(PageIndex, WriteFLBAColumnIndex) { /*has_null_counts=*/false); } +TEST(PageIndex, WriteFloat16ColumnIndex) { + using ::arrow::util::Float16; + auto encode = [](auto value) { + auto bytes = Float16(value).ToLittleEndian(); + return std::string(reinterpret_cast(bytes.data()), bytes.size()); + }; + + // Float16 (FLBA) values in the ascending order and without null count. + std::vector page_stats(4); + page_stats.at(0).set_min(encode(-1.3)).set_max(encode(+3.6)); + page_stats.at(1).set_min(encode(-0.2)).set_max(encode(+4.5)); + page_stats.at(2).set_min(encode(+1.1)).set_max(encode(+5.4)); + page_stats.at(3).set_min(encode(+2.0)).set_max(encode(+6.3)); + + auto node = schema::PrimitiveNode::Make( + "c1", Repetition::OPTIONAL, LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, + /*length=*/2); + TestWriteTypedColumnIndex(std::move(node), page_stats, BoundaryOrder::Ascending, + /*has_null_counts=*/false); +} + TEST(PageIndex, WriteColumnIndexWithAllNullPages) { // All values are null. std::vector page_stats(3); From 5eb90d7e9cd9338265c2d91991047fe519034f15 Mon Sep 17 00:00:00 2001 From: benibus Date: Tue, 14 Nov 2023 12:36:46 -0500 Subject: [PATCH 35/37] Tweak ToEndian methods --- cpp/src/arrow/util/float16.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index 888936797c870..0a432fee2cd31 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -106,7 +106,8 @@ class ARROW_EXPORT Float16 { /// \brief Copy the value's bytes in little-endian byte order void ToLittleEndian(uint8_t* dest) const { - FromBits(::arrow::bit_util::ToLittleEndian(bits_)).ToBytes(dest); + const auto bytes = ToLittleEndian(); + std::memcpy(dest, bytes.data(), bytes.size()); } /// \brief Return the value's bytes in little-endian byte order constexpr std::array ToLittleEndian() const { @@ -119,7 +120,8 @@ class ARROW_EXPORT Float16 { /// \brief Copy the value's bytes in big-endian byte order void ToBigEndian(uint8_t* dest) const { - FromBits(::arrow::bit_util::ToBigEndian(bits_)).ToBytes(dest); + const auto bytes = ToBigEndian(); + std::memcpy(dest, bytes.data(), bytes.size()); } /// \brief Return the value's bytes in big-endian byte order constexpr std::array ToBigEndian() const { From cb17f5675e6dd225afb7c036c2738ad655aa3773 Mon Sep 17 00:00:00 2001 From: benibus Date: Tue, 14 Nov 2023 12:38:43 -0500 Subject: [PATCH 36/37] Relocate random Float16 function --- cpp/src/parquet/arrow/test_util.h | 23 +++++++---------------- cpp/src/parquet/test_util.cc | 10 ++++++++++ cpp/src/parquet/test_util.h | 4 ++++ 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index 6036f47514eb1..b2be1b3c5354d 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -35,6 +35,7 @@ #include "arrow/util/decimal.h" #include "arrow/util/float16.h" #include "parquet/column_reader.h" +#include "parquet/test_util.h" namespace parquet { @@ -66,26 +67,15 @@ struct Decimal256WithPrecisionAndScale { static constexpr int32_t scale = PRECISION - 1; }; -inline void RandomHalfFloatValues(int64_t n, uint32_t seed, - ::arrow::util::Float16 min_value, - ::arrow::util::Float16 max_value, - std::vector* out) { - std::vector values; - ::arrow::random_real(n, seed, static_cast(min_value), - static_cast(max_value), &values); - out->resize(values.size()); - std::transform(values.begin(), values.end(), out->begin(), - [](float f) { return ::arrow::util::Float16(f).bits(); }); -} - template ::arrow::enable_if_floating_point NonNullArray( size_t size, std::shared_ptr* out) { using c_type = typename ArrowType::c_type; std::vector values; if constexpr (::arrow::is_half_float_type::value) { - RandomHalfFloatValues(size, 0, ::arrow::util::Float16(0.0f), - ::arrow::util::Float16(1.0f), &values); + values.resize(size); + test::random_float16_numbers(static_cast(size), 0, ::arrow::util::Float16(0.0f), + ::arrow::util::Float16(1.0f), values.data()); } else { ::arrow::random_real(size, 0, static_cast(0), static_cast(1), &values); @@ -221,8 +211,9 @@ ::arrow::enable_if_floating_point NullableArray( using c_type = typename ArrowType::c_type; std::vector values; if constexpr (::arrow::is_half_float_type::value) { - RandomHalfFloatValues(size, seed, ::arrow::util::Float16(-1e4f), - ::arrow::util::Float16(1e4f), &values); + values.resize(size); + test::random_float16_numbers(static_cast(size), 0, ::arrow::util::Float16(-1e4f), + ::arrow::util::Float16(1e4f), values.data()); } else { ::arrow::random_real(size, seed, static_cast(-1e10), static_cast(1e10), &values); diff --git a/cpp/src/parquet/test_util.cc b/cpp/src/parquet/test_util.cc index b65945cc7329f..a6fa8afc0f5b3 100644 --- a/cpp/src/parquet/test_util.cc +++ b/cpp/src/parquet/test_util.cc @@ -101,6 +101,16 @@ void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_v } } +void random_float16_numbers(int n, uint32_t seed, ::arrow::util::Float16 min_value, + ::arrow::util::Float16 max_value, uint16_t* out) { + std::vector values(n); + random_numbers(n, seed, static_cast(min_value), static_cast(max_value), + values.data()); + for (int i = 0; i < n; ++i) { + out[i] = ::arrow::util::Float16(values[i]).bits(); + } +} + void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out) { std::default_random_engine gen(seed); std::uniform_int_distribution d(0, 255); diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index c8578609e9b1d..59728cf53f699 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -33,6 +33,7 @@ #include "arrow/io/memory.h" #include "arrow/testing/util.h" +#include "arrow/util/float16.h" #include "parquet/column_page.h" #include "parquet/column_reader.h" @@ -148,6 +149,9 @@ inline void random_numbers(int n, uint32_t seed, double min_value, double max_va void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value, Int96* out); +void random_float16_numbers(int n, uint32_t seed, ::arrow::util::Float16 min_value, + ::arrow::util::Float16 max_value, uint16_t* out); + void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out); void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size, From 36b8a3b88792bebb6f561f5754e8d4f48f9160c5 Mon Sep 17 00:00:00 2001 From: benibus Date: Tue, 14 Nov 2023 15:25:34 -0500 Subject: [PATCH 37/37] Add missing schema tests --- cpp/src/parquet/schema_test.cc | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index 603d9ed8e2124..a1b5557497d9c 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1147,6 +1147,9 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) { auto check_is_UUID = [](const std::shared_ptr& logical_type) { return logical_type->is_UUID(); }; + auto check_is_float16 = [](const std::shared_ptr& logical_type) { + return logical_type->is_float16(); + }; auto check_is_null = [](const std::shared_ptr& logical_type) { return logical_type->is_null(); }; @@ -1159,6 +1162,7 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) { std::vector cases = { {LogicalType::UUID(), check_is_UUID}, + {LogicalType::Float16(), check_is_float16}, {LogicalType::Null(), check_is_null}, {LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), check_is_time}, {LogicalType::Time(false, LogicalType::TimeUnit::MICROS), check_is_time}, @@ -1242,6 +1246,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeProperties) { {JSONLogicalType::Make(), false, true, true}, {BSONLogicalType::Make(), false, true, true}, {UUIDLogicalType::Make(), false, true, true}, + {Float16LogicalType::Make(), false, true, true}, {NoLogicalType::Make(), false, false, true}, }; @@ -1351,7 +1356,8 @@ TEST(TestLogicalTypeOperation, LogicalTypeApplicability) { int physical_length; }; - std::vector inapplicable_types = {{Type::FIXED_LEN_BYTE_ARRAY, 8}, + std::vector inapplicable_types = {{Type::FIXED_LEN_BYTE_ARRAY, 1}, + {Type::FIXED_LEN_BYTE_ARRAY, 8}, {Type::FIXED_LEN_BYTE_ARRAY, 20}, {Type::BOOLEAN, -1}, {Type::INT32, -1}, @@ -1374,6 +1380,12 @@ TEST(TestLogicalTypeOperation, LogicalTypeApplicability) { for (const InapplicableType& t : inapplicable_types) { ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length)); } + + logical_type = LogicalType::Float16(); + ASSERT_TRUE(logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 2)); + for (const InapplicableType& t : inapplicable_types) { + ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length)); + } } TEST(TestLogicalTypeOperation, DecimalLogicalTypeApplicability) { @@ -1531,6 +1543,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) { {LogicalType::JSON(), "JSON", R"({"Type": "JSON"})"}, {LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"}, {LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"}, + {LogicalType::Float16(), "Float16", R"({"Type": "Float16"})"}, {LogicalType::None(), "None", R"({"Type": "None"})"}, }; @@ -1580,6 +1593,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) { {LogicalType::JSON(), SortOrder::UNSIGNED}, {LogicalType::BSON(), SortOrder::UNSIGNED}, {LogicalType::UUID(), SortOrder::UNSIGNED}, + {LogicalType::Float16(), SortOrder::SIGNED}, {LogicalType::None(), SortOrder::UNKNOWN}}; for (const ExpectedSortOrder& c : cases) { @@ -1712,6 +1726,15 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) { ASSERT_ANY_THROW(PrimitiveNode::Make("uuid", Repetition::REQUIRED, UUIDLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY, 64)); + + // Incompatible primitive type ... + ASSERT_ANY_THROW(PrimitiveNode::Make("float16", Repetition::REQUIRED, + Float16LogicalType::Make(), Type::BYTE_ARRAY, 2)); + // Incompatible primitive length ... + ASSERT_ANY_THROW(PrimitiveNode::Make("float16", Repetition::REQUIRED, + Float16LogicalType::Make(), + Type::FIXED_LEN_BYTE_ARRAY, 3)); + // Non-positive length argument for fixed length binary ... ASSERT_ANY_THROW(PrimitiveNode::Make("negative_length", Repetition::REQUIRED, NoLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY, @@ -1902,6 +1925,9 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { [this]() { return element_->logicalType.__isset.BSON; }}, {"uuid", LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16, false, ConvertedType::NA, true, [this]() { return element_->logicalType.__isset.UUID; }}, + {"float16", LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2, false, + ConvertedType::NA, true, + [this]() { return element_->logicalType.__isset.FLOAT16; }}, {"none", LogicalType::None(), Type::INT64, -1, false, ConvertedType::NA, false, check_nothing}}; @@ -2238,6 +2264,7 @@ TEST(TestLogicalTypeSerialization, Roundtrips) { {LogicalType::JSON(), Type::BYTE_ARRAY, -1}, {LogicalType::BSON(), Type::BYTE_ARRAY, -1}, {LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16}, + {LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2}, {LogicalType::None(), Type::BOOLEAN, -1}}; for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) {