From 34332178542203a4f1125898c303cc30bc755f28 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Wed, 14 Jun 2023 14:48:45 -0400
Subject: [PATCH 01/37] Regenerate thrift headers

---
 cpp/src/generated/parquet_types.cpp | 2267 ++++++++++++++-------------
 cpp/src/generated/parquet_types.h   |   45 +-
 cpp/src/parquet/parquet.thrift      |    2 +
 3 files changed, 1229 insertions(+), 1085 deletions(-)

diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp
index f4e378fd3822a..86188581e0c42 100644
--- a/cpp/src/generated/parquet_types.cpp
+++ b/cpp/src/generated/parquet_types.cpp
@@ -1288,6 +1288,81 @@ void DateType::printTo(std::ostream& out) const {
 }
 
 
+Float16Type::~Float16Type() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const Float16Type& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t Float16Type::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t Float16Type::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("Float16Type");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(Float16Type &a, Float16Type &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+Float16Type::Float16Type(const Float16Type& other28) noexcept {
+  (void) other28;
+}
+Float16Type::Float16Type(Float16Type&& other29) noexcept {
+  (void) other29;
+}
+Float16Type& Float16Type::operator=(const Float16Type& other30) noexcept {
+  (void) other30;
+  return *this;
+}
+Float16Type& Float16Type::operator=(Float16Type&& other31) noexcept {
+  (void) other31;
+  return *this;
+}
+void Float16Type::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "Float16Type(";
+  out << ")";
+}
+
+
 NullType::~NullType() noexcept {
 }
 
@@ -1342,18 +1417,18 @@ void swap(NullType &a, NullType &b) {
   (void) b;
 }
 
-NullType::NullType(const NullType& other28) noexcept {
-  (void) other28;
+NullType::NullType(const NullType& other32) noexcept {
+  (void) other32;
 }
-NullType::NullType(NullType&& other29) noexcept {
-  (void) other29;
+NullType::NullType(NullType&& other33) noexcept {
+  (void) other33;
 }
-NullType& NullType::operator=(const NullType& other30) noexcept {
-  (void) other30;
+NullType& NullType::operator=(const NullType& other34) noexcept {
+  (void) other34;
   return *this;
 }
-NullType& NullType::operator=(NullType&& other31) noexcept {
-  (void) other31;
+NullType& NullType::operator=(NullType&& other35) noexcept {
+  (void) other35;
   return *this;
 }
 void NullType::printTo(std::ostream& out) const {
@@ -1460,22 +1535,22 @@ void swap(DecimalType &a, DecimalType &b) {
   swap(a.precision, b.precision);
 }
 
-DecimalType::DecimalType(const DecimalType& other32) noexcept {
-  scale = other32.scale;
-  precision = other32.precision;
+DecimalType::DecimalType(const DecimalType& other36) noexcept {
+  scale = other36.scale;
+  precision = other36.precision;
 }
-DecimalType::DecimalType(DecimalType&& other33) noexcept {
-  scale = other33.scale;
-  precision = other33.precision;
+DecimalType::DecimalType(DecimalType&& other37) noexcept {
+  scale = other37.scale;
+  precision = other37.precision;
 }
-DecimalType& DecimalType::operator=(const DecimalType& other34) noexcept {
-  scale = other34.scale;
-  precision = other34.precision;
+DecimalType& DecimalType::operator=(const DecimalType& other38) noexcept {
+  scale = other38.scale;
+  precision = other38.precision;
   return *this;
 }
-DecimalType& DecimalType::operator=(DecimalType&& other35) noexcept {
-  scale = other35.scale;
-  precision = other35.precision;
+DecimalType& DecimalType::operator=(DecimalType&& other39) noexcept {
+  scale = other39.scale;
+  precision = other39.precision;
   return *this;
 }
 void DecimalType::printTo(std::ostream& out) const {
@@ -1541,18 +1616,18 @@ void swap(MilliSeconds &a, MilliSeconds &b) {
   (void) b;
 }
 
-MilliSeconds::MilliSeconds(const MilliSeconds& other36) noexcept {
-  (void) other36;
+MilliSeconds::MilliSeconds(const MilliSeconds& other40) noexcept {
+  (void) other40;
 }
-MilliSeconds::MilliSeconds(MilliSeconds&& other37) noexcept {
-  (void) other37;
+MilliSeconds::MilliSeconds(MilliSeconds&& other41) noexcept {
+  (void) other41;
 }
-MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other38) noexcept {
-  (void) other38;
+MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other42) noexcept {
+  (void) other42;
   return *this;
 }
-MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other39) noexcept {
-  (void) other39;
+MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other43) noexcept {
+  (void) other43;
   return *this;
 }
 void MilliSeconds::printTo(std::ostream& out) const {
@@ -1616,18 +1691,18 @@ void swap(MicroSeconds &a, MicroSeconds &b) {
   (void) b;
 }
 
-MicroSeconds::MicroSeconds(const MicroSeconds& other40) noexcept {
-  (void) other40;
+MicroSeconds::MicroSeconds(const MicroSeconds& other44) noexcept {
+  (void) other44;
 }
-MicroSeconds::MicroSeconds(MicroSeconds&& other41) noexcept {
-  (void) other41;
+MicroSeconds::MicroSeconds(MicroSeconds&& other45) noexcept {
+  (void) other45;
 }
-MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other42) noexcept {
-  (void) other42;
+MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other46) noexcept {
+  (void) other46;
   return *this;
 }
-MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other43) noexcept {
-  (void) other43;
+MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other47) noexcept {
+  (void) other47;
   return *this;
 }
 void MicroSeconds::printTo(std::ostream& out) const {
@@ -1691,18 +1766,18 @@ void swap(NanoSeconds &a, NanoSeconds &b) {
   (void) b;
 }
 
-NanoSeconds::NanoSeconds(const NanoSeconds& other44) noexcept {
-  (void) other44;
+NanoSeconds::NanoSeconds(const NanoSeconds& other48) noexcept {
+  (void) other48;
 }
-NanoSeconds::NanoSeconds(NanoSeconds&& other45) noexcept {
-  (void) other45;
+NanoSeconds::NanoSeconds(NanoSeconds&& other49) noexcept {
+  (void) other49;
 }
-NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other46) noexcept {
-  (void) other46;
+NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other50) noexcept {
+  (void) other50;
   return *this;
 }
-NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other47) noexcept {
-  (void) other47;
+NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other51) noexcept {
+  (void) other51;
   return *this;
 }
 void NanoSeconds::printTo(std::ostream& out) const {
@@ -1827,30 +1902,30 @@ void swap(TimeUnit &a, TimeUnit &b) {
   swap(a.__isset, b.__isset);
 }
 
-TimeUnit::TimeUnit(const TimeUnit& other48) noexcept {
-  MILLIS = other48.MILLIS;
-  MICROS = other48.MICROS;
-  NANOS = other48.NANOS;
-  __isset = other48.__isset;
+TimeUnit::TimeUnit(const TimeUnit& other52) noexcept {
+  MILLIS = other52.MILLIS;
+  MICROS = other52.MICROS;
+  NANOS = other52.NANOS;
+  __isset = other52.__isset;
 }
-TimeUnit::TimeUnit(TimeUnit&& other49) noexcept {
-  MILLIS = std::move(other49.MILLIS);
-  MICROS = std::move(other49.MICROS);
-  NANOS = std::move(other49.NANOS);
-  __isset = other49.__isset;
+TimeUnit::TimeUnit(TimeUnit&& other53) noexcept {
+  MILLIS = std::move(other53.MILLIS);
+  MICROS = std::move(other53.MICROS);
+  NANOS = std::move(other53.NANOS);
+  __isset = other53.__isset;
 }
-TimeUnit& TimeUnit::operator=(const TimeUnit& other50) noexcept {
-  MILLIS = other50.MILLIS;
-  MICROS = other50.MICROS;
-  NANOS = other50.NANOS;
-  __isset = other50.__isset;
+TimeUnit& TimeUnit::operator=(const TimeUnit& other54) noexcept {
+  MILLIS = other54.MILLIS;
+  MICROS = other54.MICROS;
+  NANOS = other54.NANOS;
+  __isset = other54.__isset;
   return *this;
 }
-TimeUnit& TimeUnit::operator=(TimeUnit&& other51) noexcept {
-  MILLIS = std::move(other51.MILLIS);
-  MICROS = std::move(other51.MICROS);
-  NANOS = std::move(other51.NANOS);
-  __isset = other51.__isset;
+TimeUnit& TimeUnit::operator=(TimeUnit&& other55) noexcept {
+  MILLIS = std::move(other55.MILLIS);
+  MICROS = std::move(other55.MICROS);
+  NANOS = std::move(other55.NANOS);
+  __isset = other55.__isset;
   return *this;
 }
 void TimeUnit::printTo(std::ostream& out) const {
@@ -1960,22 +2035,22 @@ void swap(TimestampType &a, TimestampType &b) {
   swap(a.unit, b.unit);
 }
 
-TimestampType::TimestampType(const TimestampType& other52) noexcept {
-  isAdjustedToUTC = other52.isAdjustedToUTC;
-  unit = other52.unit;
+TimestampType::TimestampType(const TimestampType& other56) noexcept {
+  isAdjustedToUTC = other56.isAdjustedToUTC;
+  unit = other56.unit;
 }
-TimestampType::TimestampType(TimestampType&& other53) noexcept {
-  isAdjustedToUTC = other53.isAdjustedToUTC;
-  unit = std::move(other53.unit);
+TimestampType::TimestampType(TimestampType&& other57) noexcept {
+  isAdjustedToUTC = other57.isAdjustedToUTC;
+  unit = std::move(other57.unit);
 }
-TimestampType& TimestampType::operator=(const TimestampType& other54) noexcept {
-  isAdjustedToUTC = other54.isAdjustedToUTC;
-  unit = other54.unit;
+TimestampType& TimestampType::operator=(const TimestampType& other58) noexcept {
+  isAdjustedToUTC = other58.isAdjustedToUTC;
+  unit = other58.unit;
   return *this;
 }
-TimestampType& TimestampType::operator=(TimestampType&& other55) noexcept {
-  isAdjustedToUTC = other55.isAdjustedToUTC;
-  unit = std::move(other55.unit);
+TimestampType& TimestampType::operator=(TimestampType&& other59) noexcept {
+  isAdjustedToUTC = other59.isAdjustedToUTC;
+  unit = std::move(other59.unit);
   return *this;
 }
 void TimestampType::printTo(std::ostream& out) const {
@@ -2084,22 +2159,22 @@ void swap(TimeType &a, TimeType &b) {
   swap(a.unit, b.unit);
 }
 
-TimeType::TimeType(const TimeType& other56) noexcept {
-  isAdjustedToUTC = other56.isAdjustedToUTC;
-  unit = other56.unit;
+TimeType::TimeType(const TimeType& other60) noexcept {
+  isAdjustedToUTC = other60.isAdjustedToUTC;
+  unit = other60.unit;
 }
-TimeType::TimeType(TimeType&& other57) noexcept {
-  isAdjustedToUTC = other57.isAdjustedToUTC;
-  unit = std::move(other57.unit);
+TimeType::TimeType(TimeType&& other61) noexcept {
+  isAdjustedToUTC = other61.isAdjustedToUTC;
+  unit = std::move(other61.unit);
 }
-TimeType& TimeType::operator=(const TimeType& other58) noexcept {
-  isAdjustedToUTC = other58.isAdjustedToUTC;
-  unit = other58.unit;
+TimeType& TimeType::operator=(const TimeType& other62) noexcept {
+  isAdjustedToUTC = other62.isAdjustedToUTC;
+  unit = other62.unit;
   return *this;
 }
-TimeType& TimeType::operator=(TimeType&& other59) noexcept {
-  isAdjustedToUTC = other59.isAdjustedToUTC;
-  unit = std::move(other59.unit);
+TimeType& TimeType::operator=(TimeType&& other63) noexcept {
+  isAdjustedToUTC = other63.isAdjustedToUTC;
+  unit = std::move(other63.unit);
   return *this;
 }
 void TimeType::printTo(std::ostream& out) const {
@@ -2208,22 +2283,22 @@ void swap(IntType &a, IntType &b) {
   swap(a.isSigned, b.isSigned);
 }
 
-IntType::IntType(const IntType& other60) noexcept {
-  bitWidth = other60.bitWidth;
-  isSigned = other60.isSigned;
+IntType::IntType(const IntType& other64) noexcept {
+  bitWidth = other64.bitWidth;
+  isSigned = other64.isSigned;
 }
-IntType::IntType(IntType&& other61) noexcept {
-  bitWidth = other61.bitWidth;
-  isSigned = other61.isSigned;
+IntType::IntType(IntType&& other65) noexcept {
+  bitWidth = other65.bitWidth;
+  isSigned = other65.isSigned;
 }
-IntType& IntType::operator=(const IntType& other62) noexcept {
-  bitWidth = other62.bitWidth;
-  isSigned = other62.isSigned;
+IntType& IntType::operator=(const IntType& other66) noexcept {
+  bitWidth = other66.bitWidth;
+  isSigned = other66.isSigned;
   return *this;
 }
-IntType& IntType::operator=(IntType&& other63) noexcept {
-  bitWidth = other63.bitWidth;
-  isSigned = other63.isSigned;
+IntType& IntType::operator=(IntType&& other67) noexcept {
+  bitWidth = other67.bitWidth;
+  isSigned = other67.isSigned;
   return *this;
 }
 void IntType::printTo(std::ostream& out) const {
@@ -2289,18 +2364,18 @@ void swap(JsonType &a, JsonType &b) {
   (void) b;
 }
 
-JsonType::JsonType(const JsonType& other64) noexcept {
-  (void) other64;
+JsonType::JsonType(const JsonType& other68) noexcept {
+  (void) other68;
 }
-JsonType::JsonType(JsonType&& other65) noexcept {
-  (void) other65;
+JsonType::JsonType(JsonType&& other69) noexcept {
+  (void) other69;
 }
-JsonType& JsonType::operator=(const JsonType& other66) noexcept {
-  (void) other66;
+JsonType& JsonType::operator=(const JsonType& other70) noexcept {
+  (void) other70;
   return *this;
 }
-JsonType& JsonType::operator=(JsonType&& other67) noexcept {
-  (void) other67;
+JsonType& JsonType::operator=(JsonType&& other71) noexcept {
+  (void) other71;
   return *this;
 }
 void JsonType::printTo(std::ostream& out) const {
@@ -2364,18 +2439,18 @@ void swap(BsonType &a, BsonType &b) {
   (void) b;
 }
 
-BsonType::BsonType(const BsonType& other68) noexcept {
-  (void) other68;
+BsonType::BsonType(const BsonType& other72) noexcept {
+  (void) other72;
 }
-BsonType::BsonType(BsonType&& other69) noexcept {
-  (void) other69;
+BsonType::BsonType(BsonType&& other73) noexcept {
+  (void) other73;
 }
-BsonType& BsonType::operator=(const BsonType& other70) noexcept {
-  (void) other70;
+BsonType& BsonType::operator=(const BsonType& other74) noexcept {
+  (void) other74;
   return *this;
 }
-BsonType& BsonType::operator=(BsonType&& other71) noexcept {
-  (void) other71;
+BsonType& BsonType::operator=(BsonType&& other75) noexcept {
+  (void) other75;
   return *this;
 }
 void BsonType::printTo(std::ostream& out) const {
@@ -2453,6 +2528,11 @@ void LogicalType::__set_UUID(const UUIDType& val) {
   this->UUID = val;
 __isset.UUID = true;
 }
+
+void LogicalType::__set_FLOAT16(const Float16Type& val) {
+  this->FLOAT16 = val;
+__isset.FLOAT16 = true;
+}
 std::ostream& operator<<(std::ostream& out, const LogicalType& obj)
 {
   obj.printTo(out);
@@ -2585,6 +2665,14 @@ uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) {
           xfer += iprot->skip(ftype);
         }
         break;
+      case 15:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->FLOAT16.read(iprot);
+          this->__isset.FLOAT16 = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -2667,6 +2755,11 @@ uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += this->UUID.write(oprot);
     xfer += oprot->writeFieldEnd();
   }
+  if (this->__isset.FLOAT16) {
+    xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15);
+    xfer += this->FLOAT16.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -2687,73 +2780,78 @@ void swap(LogicalType &a, LogicalType &b) {
   swap(a.JSON, b.JSON);
   swap(a.BSON, b.BSON);
   swap(a.UUID, b.UUID);
+  swap(a.FLOAT16, b.FLOAT16);
   swap(a.__isset, b.__isset);
 }
 
-LogicalType::LogicalType(const LogicalType& other72) noexcept {
-  STRING = other72.STRING;
-  MAP = other72.MAP;
-  LIST = other72.LIST;
-  ENUM = other72.ENUM;
-  DECIMAL = other72.DECIMAL;
-  DATE = other72.DATE;
-  TIME = other72.TIME;
-  TIMESTAMP = other72.TIMESTAMP;
-  INTEGER = other72.INTEGER;
-  UNKNOWN = other72.UNKNOWN;
-  JSON = other72.JSON;
-  BSON = other72.BSON;
-  UUID = other72.UUID;
-  __isset = other72.__isset;
-}
-LogicalType::LogicalType(LogicalType&& other73) noexcept {
-  STRING = std::move(other73.STRING);
-  MAP = std::move(other73.MAP);
-  LIST = std::move(other73.LIST);
-  ENUM = std::move(other73.ENUM);
-  DECIMAL = std::move(other73.DECIMAL);
-  DATE = std::move(other73.DATE);
-  TIME = std::move(other73.TIME);
-  TIMESTAMP = std::move(other73.TIMESTAMP);
-  INTEGER = std::move(other73.INTEGER);
-  UNKNOWN = std::move(other73.UNKNOWN);
-  JSON = std::move(other73.JSON);
-  BSON = std::move(other73.BSON);
-  UUID = std::move(other73.UUID);
-  __isset = other73.__isset;
-}
-LogicalType& LogicalType::operator=(const LogicalType& other74) noexcept {
-  STRING = other74.STRING;
-  MAP = other74.MAP;
-  LIST = other74.LIST;
-  ENUM = other74.ENUM;
-  DECIMAL = other74.DECIMAL;
-  DATE = other74.DATE;
-  TIME = other74.TIME;
-  TIMESTAMP = other74.TIMESTAMP;
-  INTEGER = other74.INTEGER;
-  UNKNOWN = other74.UNKNOWN;
-  JSON = other74.JSON;
-  BSON = other74.BSON;
-  UUID = other74.UUID;
-  __isset = other74.__isset;
+LogicalType::LogicalType(const LogicalType& other76) noexcept {
+  STRING = other76.STRING;
+  MAP = other76.MAP;
+  LIST = other76.LIST;
+  ENUM = other76.ENUM;
+  DECIMAL = other76.DECIMAL;
+  DATE = other76.DATE;
+  TIME = other76.TIME;
+  TIMESTAMP = other76.TIMESTAMP;
+  INTEGER = other76.INTEGER;
+  UNKNOWN = other76.UNKNOWN;
+  JSON = other76.JSON;
+  BSON = other76.BSON;
+  UUID = other76.UUID;
+  FLOAT16 = other76.FLOAT16;
+  __isset = other76.__isset;
+}
+LogicalType::LogicalType(LogicalType&& other77) noexcept {
+  STRING = std::move(other77.STRING);
+  MAP = std::move(other77.MAP);
+  LIST = std::move(other77.LIST);
+  ENUM = std::move(other77.ENUM);
+  DECIMAL = std::move(other77.DECIMAL);
+  DATE = std::move(other77.DATE);
+  TIME = std::move(other77.TIME);
+  TIMESTAMP = std::move(other77.TIMESTAMP);
+  INTEGER = std::move(other77.INTEGER);
+  UNKNOWN = std::move(other77.UNKNOWN);
+  JSON = std::move(other77.JSON);
+  BSON = std::move(other77.BSON);
+  UUID = std::move(other77.UUID);
+  FLOAT16 = std::move(other77.FLOAT16);
+  __isset = other77.__isset;
+}
+LogicalType& LogicalType::operator=(const LogicalType& other78) noexcept {
+  STRING = other78.STRING;
+  MAP = other78.MAP;
+  LIST = other78.LIST;
+  ENUM = other78.ENUM;
+  DECIMAL = other78.DECIMAL;
+  DATE = other78.DATE;
+  TIME = other78.TIME;
+  TIMESTAMP = other78.TIMESTAMP;
+  INTEGER = other78.INTEGER;
+  UNKNOWN = other78.UNKNOWN;
+  JSON = other78.JSON;
+  BSON = other78.BSON;
+  UUID = other78.UUID;
+  FLOAT16 = other78.FLOAT16;
+  __isset = other78.__isset;
   return *this;
 }
-LogicalType& LogicalType::operator=(LogicalType&& other75) noexcept {
-  STRING = std::move(other75.STRING);
-  MAP = std::move(other75.MAP);
-  LIST = std::move(other75.LIST);
-  ENUM = std::move(other75.ENUM);
-  DECIMAL = std::move(other75.DECIMAL);
-  DATE = std::move(other75.DATE);
-  TIME = std::move(other75.TIME);
-  TIMESTAMP = std::move(other75.TIMESTAMP);
-  INTEGER = std::move(other75.INTEGER);
-  UNKNOWN = std::move(other75.UNKNOWN);
-  JSON = std::move(other75.JSON);
-  BSON = std::move(other75.BSON);
-  UUID = std::move(other75.UUID);
-  __isset = other75.__isset;
+LogicalType& LogicalType::operator=(LogicalType&& other79) noexcept {
+  STRING = std::move(other79.STRING);
+  MAP = std::move(other79.MAP);
+  LIST = std::move(other79.LIST);
+  ENUM = std::move(other79.ENUM);
+  DECIMAL = std::move(other79.DECIMAL);
+  DATE = std::move(other79.DATE);
+  TIME = std::move(other79.TIME);
+  TIMESTAMP = std::move(other79.TIMESTAMP);
+  INTEGER = std::move(other79.INTEGER);
+  UNKNOWN = std::move(other79.UNKNOWN);
+  JSON = std::move(other79.JSON);
+  BSON = std::move(other79.BSON);
+  UUID = std::move(other79.UUID);
+  FLOAT16 = std::move(other79.FLOAT16);
+  __isset = other79.__isset;
   return *this;
 }
 void LogicalType::printTo(std::ostream& out) const {
@@ -2772,6 +2870,7 @@ void LogicalType::printTo(std::ostream& out) const {
   out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "<null>"));
   out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "<null>"));
   out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "<null>"));
+  out << ", " << "FLOAT16="; (__isset.FLOAT16 ? (out << to_string(FLOAT16)) : (out << "<null>"));
   out << ")";
 }
 
@@ -2859,9 +2958,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast76;
-          xfer += iprot->readI32(ecast76);
-          this->type = static_cast<Type::type>(ecast76);
+          int32_t ecast80;
+          xfer += iprot->readI32(ecast80);
+          this->type = static_cast<Type::type>(ecast80);
           this->__isset.type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -2877,9 +2976,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 3:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast77;
-          xfer += iprot->readI32(ecast77);
-          this->repetition_type = static_cast<FieldRepetitionType::type>(ecast77);
+          int32_t ecast81;
+          xfer += iprot->readI32(ecast81);
+          this->repetition_type = static_cast<FieldRepetitionType::type>(ecast81);
           this->__isset.repetition_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -2903,9 +3002,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 6:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast78;
-          xfer += iprot->readI32(ecast78);
-          this->converted_type = static_cast<ConvertedType::type>(ecast78);
+          int32_t ecast82;
+          xfer += iprot->readI32(ecast82);
+          this->converted_type = static_cast<ConvertedType::type>(ecast82);
           this->__isset.converted_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3031,58 +3130,58 @@ void swap(SchemaElement &a, SchemaElement &b) {
   swap(a.__isset, b.__isset);
 }
 
-SchemaElement::SchemaElement(const SchemaElement& other79) {
-  type = other79.type;
-  type_length = other79.type_length;
-  repetition_type = other79.repetition_type;
-  name = other79.name;
-  num_children = other79.num_children;
-  converted_type = other79.converted_type;
-  scale = other79.scale;
-  precision = other79.precision;
-  field_id = other79.field_id;
-  logicalType = other79.logicalType;
-  __isset = other79.__isset;
-}
-SchemaElement::SchemaElement(SchemaElement&& other80) noexcept {
-  type = other80.type;
-  type_length = other80.type_length;
-  repetition_type = other80.repetition_type;
-  name = std::move(other80.name);
-  num_children = other80.num_children;
-  converted_type = other80.converted_type;
-  scale = other80.scale;
-  precision = other80.precision;
-  field_id = other80.field_id;
-  logicalType = std::move(other80.logicalType);
-  __isset = other80.__isset;
-}
-SchemaElement& SchemaElement::operator=(const SchemaElement& other81) {
-  type = other81.type;
-  type_length = other81.type_length;
-  repetition_type = other81.repetition_type;
-  name = other81.name;
-  num_children = other81.num_children;
-  converted_type = other81.converted_type;
-  scale = other81.scale;
-  precision = other81.precision;
-  field_id = other81.field_id;
-  logicalType = other81.logicalType;
-  __isset = other81.__isset;
+SchemaElement::SchemaElement(const SchemaElement& other83) {
+  type = other83.type;
+  type_length = other83.type_length;
+  repetition_type = other83.repetition_type;
+  name = other83.name;
+  num_children = other83.num_children;
+  converted_type = other83.converted_type;
+  scale = other83.scale;
+  precision = other83.precision;
+  field_id = other83.field_id;
+  logicalType = other83.logicalType;
+  __isset = other83.__isset;
+}
+SchemaElement::SchemaElement(SchemaElement&& other84) noexcept {
+  type = other84.type;
+  type_length = other84.type_length;
+  repetition_type = other84.repetition_type;
+  name = std::move(other84.name);
+  num_children = other84.num_children;
+  converted_type = other84.converted_type;
+  scale = other84.scale;
+  precision = other84.precision;
+  field_id = other84.field_id;
+  logicalType = std::move(other84.logicalType);
+  __isset = other84.__isset;
+}
+SchemaElement& SchemaElement::operator=(const SchemaElement& other85) {
+  type = other85.type;
+  type_length = other85.type_length;
+  repetition_type = other85.repetition_type;
+  name = other85.name;
+  num_children = other85.num_children;
+  converted_type = other85.converted_type;
+  scale = other85.scale;
+  precision = other85.precision;
+  field_id = other85.field_id;
+  logicalType = other85.logicalType;
+  __isset = other85.__isset;
   return *this;
 }
-SchemaElement& SchemaElement::operator=(SchemaElement&& other82) noexcept {
-  type = other82.type;
-  type_length = other82.type_length;
-  repetition_type = other82.repetition_type;
-  name = std::move(other82.name);
-  num_children = other82.num_children;
-  converted_type = other82.converted_type;
-  scale = other82.scale;
-  precision = other82.precision;
-  field_id = other82.field_id;
-  logicalType = std::move(other82.logicalType);
-  __isset = other82.__isset;
+SchemaElement& SchemaElement::operator=(SchemaElement&& other86) noexcept {
+  type = other86.type;
+  type_length = other86.type_length;
+  repetition_type = other86.repetition_type;
+  name = std::move(other86.name);
+  num_children = other86.num_children;
+  converted_type = other86.converted_type;
+  scale = other86.scale;
+  precision = other86.precision;
+  field_id = other86.field_id;
+  logicalType = std::move(other86.logicalType);
+  __isset = other86.__isset;
   return *this;
 }
 void SchemaElement::printTo(std::ostream& out) const {
@@ -3168,9 +3267,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 2:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast83;
-          xfer += iprot->readI32(ecast83);
-          this->encoding = static_cast<Encoding::type>(ecast83);
+          int32_t ecast87;
+          xfer += iprot->readI32(ecast87);
+          this->encoding = static_cast<Encoding::type>(ecast87);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3178,9 +3277,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 3:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast84;
-          xfer += iprot->readI32(ecast84);
-          this->definition_level_encoding = static_cast<Encoding::type>(ecast84);
+          int32_t ecast88;
+          xfer += iprot->readI32(ecast88);
+          this->definition_level_encoding = static_cast<Encoding::type>(ecast88);
           isset_definition_level_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3188,9 +3287,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast85;
-          xfer += iprot->readI32(ecast85);
-          this->repetition_level_encoding = static_cast<Encoding::type>(ecast85);
+          int32_t ecast89;
+          xfer += iprot->readI32(ecast89);
+          this->repetition_level_encoding = static_cast<Encoding::type>(ecast89);
           isset_repetition_level_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3265,38 +3364,38 @@ void swap(DataPageHeader &a, DataPageHeader &b) {
   swap(a.__isset, b.__isset);
 }
 
-DataPageHeader::DataPageHeader(const DataPageHeader& other86) {
-  num_values = other86.num_values;
-  encoding = other86.encoding;
-  definition_level_encoding = other86.definition_level_encoding;
-  repetition_level_encoding = other86.repetition_level_encoding;
-  statistics = other86.statistics;
-  __isset = other86.__isset;
-}
-DataPageHeader::DataPageHeader(DataPageHeader&& other87) noexcept {
-  num_values = other87.num_values;
-  encoding = other87.encoding;
-  definition_level_encoding = other87.definition_level_encoding;
-  repetition_level_encoding = other87.repetition_level_encoding;
-  statistics = std::move(other87.statistics);
-  __isset = other87.__isset;
-}
-DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other88) {
-  num_values = other88.num_values;
-  encoding = other88.encoding;
-  definition_level_encoding = other88.definition_level_encoding;
-  repetition_level_encoding = other88.repetition_level_encoding;
-  statistics = other88.statistics;
-  __isset = other88.__isset;
+DataPageHeader::DataPageHeader(const DataPageHeader& other90) {
+  num_values = other90.num_values;
+  encoding = other90.encoding;
+  definition_level_encoding = other90.definition_level_encoding;
+  repetition_level_encoding = other90.repetition_level_encoding;
+  statistics = other90.statistics;
+  __isset = other90.__isset;
+}
+DataPageHeader::DataPageHeader(DataPageHeader&& other91) noexcept {
+  num_values = other91.num_values;
+  encoding = other91.encoding;
+  definition_level_encoding = other91.definition_level_encoding;
+  repetition_level_encoding = other91.repetition_level_encoding;
+  statistics = std::move(other91.statistics);
+  __isset = other91.__isset;
+}
+DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other92) {
+  num_values = other92.num_values;
+  encoding = other92.encoding;
+  definition_level_encoding = other92.definition_level_encoding;
+  repetition_level_encoding = other92.repetition_level_encoding;
+  statistics = other92.statistics;
+  __isset = other92.__isset;
   return *this;
 }
-DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other89) noexcept {
-  num_values = other89.num_values;
-  encoding = other89.encoding;
-  definition_level_encoding = other89.definition_level_encoding;
-  repetition_level_encoding = other89.repetition_level_encoding;
-  statistics = std::move(other89.statistics);
-  __isset = other89.__isset;
+DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other93) noexcept {
+  num_values = other93.num_values;
+  encoding = other93.encoding;
+  definition_level_encoding = other93.definition_level_encoding;
+  repetition_level_encoding = other93.repetition_level_encoding;
+  statistics = std::move(other93.statistics);
+  __isset = other93.__isset;
   return *this;
 }
 void DataPageHeader::printTo(std::ostream& out) const {
@@ -3365,18 +3464,18 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) {
   (void) b;
 }
 
-IndexPageHeader::IndexPageHeader(const IndexPageHeader& other90) noexcept {
-  (void) other90;
+IndexPageHeader::IndexPageHeader(const IndexPageHeader& other94) noexcept {
+  (void) other94;
 }
-IndexPageHeader::IndexPageHeader(IndexPageHeader&& other91) noexcept {
-  (void) other91;
+IndexPageHeader::IndexPageHeader(IndexPageHeader&& other95) noexcept {
+  (void) other95;
 }
-IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other92) noexcept {
-  (void) other92;
+IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other96) noexcept {
+  (void) other96;
   return *this;
 }
-IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other93) noexcept {
-  (void) other93;
+IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other97) noexcept {
+  (void) other97;
   return *this;
 }
 void IndexPageHeader::printTo(std::ostream& out) const {
@@ -3442,9 +3541,9 @@ uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot
         break;
       case 2:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast94;
-          xfer += iprot->readI32(ecast94);
-          this->encoding = static_cast<Encoding::type>(ecast94);
+          int32_t ecast98;
+          xfer += iprot->readI32(ecast98);
+          this->encoding = static_cast<Encoding::type>(ecast98);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3505,30 +3604,30 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) {
   swap(a.__isset, b.__isset);
 }
 
-DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other95) noexcept {
-  num_values = other95.num_values;
-  encoding = other95.encoding;
-  is_sorted = other95.is_sorted;
-  __isset = other95.__isset;
+DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other99) noexcept {
+  num_values = other99.num_values;
+  encoding = other99.encoding;
+  is_sorted = other99.is_sorted;
+  __isset = other99.__isset;
 }
-DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other96) noexcept {
-  num_values = other96.num_values;
-  encoding = other96.encoding;
-  is_sorted = other96.is_sorted;
-  __isset = other96.__isset;
+DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other100) noexcept {
+  num_values = other100.num_values;
+  encoding = other100.encoding;
+  is_sorted = other100.is_sorted;
+  __isset = other100.__isset;
 }
-DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other97) noexcept {
-  num_values = other97.num_values;
-  encoding = other97.encoding;
-  is_sorted = other97.is_sorted;
-  __isset = other97.__isset;
+DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other101) noexcept {
+  num_values = other101.num_values;
+  encoding = other101.encoding;
+  is_sorted = other101.is_sorted;
+  __isset = other101.__isset;
   return *this;
 }
-DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other98) noexcept {
-  num_values = other98.num_values;
-  encoding = other98.encoding;
-  is_sorted = other98.is_sorted;
-  __isset = other98.__isset;
+DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other102) noexcept {
+  num_values = other102.num_values;
+  encoding = other102.encoding;
+  is_sorted = other102.is_sorted;
+  __isset = other102.__isset;
   return *this;
 }
 void DictionaryPageHeader::printTo(std::ostream& out) const {
@@ -3638,9 +3737,9 @@ uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast99;
-          xfer += iprot->readI32(ecast99);
-          this->encoding = static_cast<Encoding::type>(ecast99);
+          int32_t ecast103;
+          xfer += iprot->readI32(ecast103);
+          this->encoding = static_cast<Encoding::type>(ecast103);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3759,50 +3858,50 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) {
   swap(a.__isset, b.__isset);
 }
 
-DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other100) {
-  num_values = other100.num_values;
-  num_nulls = other100.num_nulls;
-  num_rows = other100.num_rows;
-  encoding = other100.encoding;
-  definition_levels_byte_length = other100.definition_levels_byte_length;
-  repetition_levels_byte_length = other100.repetition_levels_byte_length;
-  is_compressed = other100.is_compressed;
-  statistics = other100.statistics;
-  __isset = other100.__isset;
-}
-DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other101) noexcept {
-  num_values = other101.num_values;
-  num_nulls = other101.num_nulls;
-  num_rows = other101.num_rows;
-  encoding = other101.encoding;
-  definition_levels_byte_length = other101.definition_levels_byte_length;
-  repetition_levels_byte_length = other101.repetition_levels_byte_length;
-  is_compressed = other101.is_compressed;
-  statistics = std::move(other101.statistics);
-  __isset = other101.__isset;
-}
-DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other102) {
-  num_values = other102.num_values;
-  num_nulls = other102.num_nulls;
-  num_rows = other102.num_rows;
-  encoding = other102.encoding;
-  definition_levels_byte_length = other102.definition_levels_byte_length;
-  repetition_levels_byte_length = other102.repetition_levels_byte_length;
-  is_compressed = other102.is_compressed;
-  statistics = other102.statistics;
-  __isset = other102.__isset;
+DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other104) {
+  num_values = other104.num_values;
+  num_nulls = other104.num_nulls;
+  num_rows = other104.num_rows;
+  encoding = other104.encoding;
+  definition_levels_byte_length = other104.definition_levels_byte_length;
+  repetition_levels_byte_length = other104.repetition_levels_byte_length;
+  is_compressed = other104.is_compressed;
+  statistics = other104.statistics;
+  __isset = other104.__isset;
+}
+DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other105) noexcept {
+  num_values = other105.num_values;
+  num_nulls = other105.num_nulls;
+  num_rows = other105.num_rows;
+  encoding = other105.encoding;
+  definition_levels_byte_length = other105.definition_levels_byte_length;
+  repetition_levels_byte_length = other105.repetition_levels_byte_length;
+  is_compressed = other105.is_compressed;
+  statistics = std::move(other105.statistics);
+  __isset = other105.__isset;
+}
+DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other106) {
+  num_values = other106.num_values;
+  num_nulls = other106.num_nulls;
+  num_rows = other106.num_rows;
+  encoding = other106.encoding;
+  definition_levels_byte_length = other106.definition_levels_byte_length;
+  repetition_levels_byte_length = other106.repetition_levels_byte_length;
+  is_compressed = other106.is_compressed;
+  statistics = other106.statistics;
+  __isset = other106.__isset;
   return *this;
 }
-DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other103) noexcept {
-  num_values = other103.num_values;
-  num_nulls = other103.num_nulls;
-  num_rows = other103.num_rows;
-  encoding = other103.encoding;
-  definition_levels_byte_length = other103.definition_levels_byte_length;
-  repetition_levels_byte_length = other103.repetition_levels_byte_length;
-  is_compressed = other103.is_compressed;
-  statistics = std::move(other103.statistics);
-  __isset = other103.__isset;
+DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other107) noexcept {
+  num_values = other107.num_values;
+  num_nulls = other107.num_nulls;
+  num_rows = other107.num_rows;
+  encoding = other107.encoding;
+  definition_levels_byte_length = other107.definition_levels_byte_length;
+  repetition_levels_byte_length = other107.repetition_levels_byte_length;
+  is_compressed = other107.is_compressed;
+  statistics = std::move(other107.statistics);
+  __isset = other107.__isset;
   return *this;
 }
 void DataPageHeaderV2::printTo(std::ostream& out) const {
@@ -3874,18 +3973,18 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) {
   (void) b;
 }
 
-SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other104) noexcept {
-  (void) other104;
+SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other108) noexcept {
+  (void) other108;
 }
-SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other105) noexcept {
-  (void) other105;
+SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other109) noexcept {
+  (void) other109;
 }
-SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other106) noexcept {
-  (void) other106;
+SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other110) noexcept {
+  (void) other110;
   return *this;
 }
-SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other107) noexcept {
-  (void) other107;
+SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other111) noexcept {
+  (void) other111;
   return *this;
 }
 void SplitBlockAlgorithm::printTo(std::ostream& out) const {
@@ -3972,22 +4071,22 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) {
   swap(a.__isset, b.__isset);
 }
 
-BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other108) noexcept {
-  BLOCK = other108.BLOCK;
-  __isset = other108.__isset;
+BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other112) noexcept {
+  BLOCK = other112.BLOCK;
+  __isset = other112.__isset;
 }
-BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other109) noexcept {
-  BLOCK = std::move(other109.BLOCK);
-  __isset = other109.__isset;
+BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other113) noexcept {
+  BLOCK = std::move(other113.BLOCK);
+  __isset = other113.__isset;
 }
-BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other110) noexcept {
-  BLOCK = other110.BLOCK;
-  __isset = other110.__isset;
+BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other114) noexcept {
+  BLOCK = other114.BLOCK;
+  __isset = other114.__isset;
   return *this;
 }
-BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other111) noexcept {
-  BLOCK = std::move(other111.BLOCK);
-  __isset = other111.__isset;
+BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other115) noexcept {
+  BLOCK = std::move(other115.BLOCK);
+  __isset = other115.__isset;
   return *this;
 }
 void BloomFilterAlgorithm::printTo(std::ostream& out) const {
@@ -4052,18 +4151,18 @@ void swap(XxHash &a, XxHash &b) {
   (void) b;
 }
 
-XxHash::XxHash(const XxHash& other112) noexcept {
-  (void) other112;
+XxHash::XxHash(const XxHash& other116) noexcept {
+  (void) other116;
 }
-XxHash::XxHash(XxHash&& other113) noexcept {
-  (void) other113;
+XxHash::XxHash(XxHash&& other117) noexcept {
+  (void) other117;
 }
-XxHash& XxHash::operator=(const XxHash& other114) noexcept {
-  (void) other114;
+XxHash& XxHash::operator=(const XxHash& other118) noexcept {
+  (void) other118;
   return *this;
 }
-XxHash& XxHash::operator=(XxHash&& other115) noexcept {
-  (void) other115;
+XxHash& XxHash::operator=(XxHash&& other119) noexcept {
+  (void) other119;
   return *this;
 }
 void XxHash::printTo(std::ostream& out) const {
@@ -4150,22 +4249,22 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) {
   swap(a.__isset, b.__isset);
 }
 
-BloomFilterHash::BloomFilterHash(const BloomFilterHash& other116) noexcept {
-  XXHASH = other116.XXHASH;
-  __isset = other116.__isset;
+BloomFilterHash::BloomFilterHash(const BloomFilterHash& other120) noexcept {
+  XXHASH = other120.XXHASH;
+  __isset = other120.__isset;
 }
-BloomFilterHash::BloomFilterHash(BloomFilterHash&& other117) noexcept {
-  XXHASH = std::move(other117.XXHASH);
-  __isset = other117.__isset;
+BloomFilterHash::BloomFilterHash(BloomFilterHash&& other121) noexcept {
+  XXHASH = std::move(other121.XXHASH);
+  __isset = other121.__isset;
 }
-BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other118) noexcept {
-  XXHASH = other118.XXHASH;
-  __isset = other118.__isset;
+BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other122) noexcept {
+  XXHASH = other122.XXHASH;
+  __isset = other122.__isset;
   return *this;
 }
-BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other119) noexcept {
-  XXHASH = std::move(other119.XXHASH);
-  __isset = other119.__isset;
+BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other123) noexcept {
+  XXHASH = std::move(other123.XXHASH);
+  __isset = other123.__isset;
   return *this;
 }
 void BloomFilterHash::printTo(std::ostream& out) const {
@@ -4230,18 +4329,18 @@ void swap(Uncompressed &a, Uncompressed &b) {
   (void) b;
 }
 
-Uncompressed::Uncompressed(const Uncompressed& other120) noexcept {
-  (void) other120;
+Uncompressed::Uncompressed(const Uncompressed& other124) noexcept {
+  (void) other124;
 }
-Uncompressed::Uncompressed(Uncompressed&& other121) noexcept {
-  (void) other121;
+Uncompressed::Uncompressed(Uncompressed&& other125) noexcept {
+  (void) other125;
 }
-Uncompressed& Uncompressed::operator=(const Uncompressed& other122) noexcept {
-  (void) other122;
+Uncompressed& Uncompressed::operator=(const Uncompressed& other126) noexcept {
+  (void) other126;
   return *this;
 }
-Uncompressed& Uncompressed::operator=(Uncompressed&& other123) noexcept {
-  (void) other123;
+Uncompressed& Uncompressed::operator=(Uncompressed&& other127) noexcept {
+  (void) other127;
   return *this;
 }
 void Uncompressed::printTo(std::ostream& out) const {
@@ -4328,22 +4427,22 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) {
   swap(a.__isset, b.__isset);
 }
 
-BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other124) noexcept {
-  UNCOMPRESSED = other124.UNCOMPRESSED;
-  __isset = other124.__isset;
+BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other128) noexcept {
+  UNCOMPRESSED = other128.UNCOMPRESSED;
+  __isset = other128.__isset;
 }
-BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other125) noexcept {
-  UNCOMPRESSED = std::move(other125.UNCOMPRESSED);
-  __isset = other125.__isset;
+BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other129) noexcept {
+  UNCOMPRESSED = std::move(other129.UNCOMPRESSED);
+  __isset = other129.__isset;
 }
-BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other126) noexcept {
-  UNCOMPRESSED = other126.UNCOMPRESSED;
-  __isset = other126.__isset;
+BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other130) noexcept {
+  UNCOMPRESSED = other130.UNCOMPRESSED;
+  __isset = other130.__isset;
   return *this;
 }
-BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other127) noexcept {
-  UNCOMPRESSED = std::move(other127.UNCOMPRESSED);
-  __isset = other127.__isset;
+BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other131) noexcept {
+  UNCOMPRESSED = std::move(other131.UNCOMPRESSED);
+  __isset = other131.__isset;
   return *this;
 }
 void BloomFilterCompression::printTo(std::ostream& out) const {
@@ -4491,30 +4590,30 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) {
   swap(a.compression, b.compression);
 }
 
-BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other128) noexcept {
-  numBytes = other128.numBytes;
-  algorithm = other128.algorithm;
-  hash = other128.hash;
-  compression = other128.compression;
+BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other132) noexcept {
+  numBytes = other132.numBytes;
+  algorithm = other132.algorithm;
+  hash = other132.hash;
+  compression = other132.compression;
 }
-BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other129) noexcept {
-  numBytes = other129.numBytes;
-  algorithm = std::move(other129.algorithm);
-  hash = std::move(other129.hash);
-  compression = std::move(other129.compression);
+BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other133) noexcept {
+  numBytes = other133.numBytes;
+  algorithm = std::move(other133.algorithm);
+  hash = std::move(other133.hash);
+  compression = std::move(other133.compression);
 }
-BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other130) noexcept {
-  numBytes = other130.numBytes;
-  algorithm = other130.algorithm;
-  hash = other130.hash;
-  compression = other130.compression;
+BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other134) noexcept {
+  numBytes = other134.numBytes;
+  algorithm = other134.algorithm;
+  hash = other134.hash;
+  compression = other134.compression;
   return *this;
 }
-BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other131) noexcept {
-  numBytes = other131.numBytes;
-  algorithm = std::move(other131.algorithm);
-  hash = std::move(other131.hash);
-  compression = std::move(other131.compression);
+BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other135) noexcept {
+  numBytes = other135.numBytes;
+  algorithm = std::move(other135.algorithm);
+  hash = std::move(other135.hash);
+  compression = std::move(other135.compression);
   return *this;
 }
 void BloomFilterHeader::printTo(std::ostream& out) const {
@@ -4601,9 +4700,9 @@ uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast132;
-          xfer += iprot->readI32(ecast132);
-          this->type = static_cast<PageType::type>(ecast132);
+          int32_t ecast136;
+          xfer += iprot->readI32(ecast136);
+          this->type = static_cast<PageType::type>(ecast136);
           isset_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -4743,50 +4842,50 @@ void swap(PageHeader &a, PageHeader &b) {
   swap(a.__isset, b.__isset);
 }
 
-PageHeader::PageHeader(const PageHeader& other133) {
-  type = other133.type;
-  uncompressed_page_size = other133.uncompressed_page_size;
-  compressed_page_size = other133.compressed_page_size;
-  crc = other133.crc;
-  data_page_header = other133.data_page_header;
-  index_page_header = other133.index_page_header;
-  dictionary_page_header = other133.dictionary_page_header;
-  data_page_header_v2 = other133.data_page_header_v2;
-  __isset = other133.__isset;
-}
-PageHeader::PageHeader(PageHeader&& other134) noexcept {
-  type = other134.type;
-  uncompressed_page_size = other134.uncompressed_page_size;
-  compressed_page_size = other134.compressed_page_size;
-  crc = other134.crc;
-  data_page_header = std::move(other134.data_page_header);
-  index_page_header = std::move(other134.index_page_header);
-  dictionary_page_header = std::move(other134.dictionary_page_header);
-  data_page_header_v2 = std::move(other134.data_page_header_v2);
-  __isset = other134.__isset;
-}
-PageHeader& PageHeader::operator=(const PageHeader& other135) {
-  type = other135.type;
-  uncompressed_page_size = other135.uncompressed_page_size;
-  compressed_page_size = other135.compressed_page_size;
-  crc = other135.crc;
-  data_page_header = other135.data_page_header;
-  index_page_header = other135.index_page_header;
-  dictionary_page_header = other135.dictionary_page_header;
-  data_page_header_v2 = other135.data_page_header_v2;
-  __isset = other135.__isset;
+PageHeader::PageHeader(const PageHeader& other137) {
+  type = other137.type;
+  uncompressed_page_size = other137.uncompressed_page_size;
+  compressed_page_size = other137.compressed_page_size;
+  crc = other137.crc;
+  data_page_header = other137.data_page_header;
+  index_page_header = other137.index_page_header;
+  dictionary_page_header = other137.dictionary_page_header;
+  data_page_header_v2 = other137.data_page_header_v2;
+  __isset = other137.__isset;
+}
+PageHeader::PageHeader(PageHeader&& other138) noexcept {
+  type = other138.type;
+  uncompressed_page_size = other138.uncompressed_page_size;
+  compressed_page_size = other138.compressed_page_size;
+  crc = other138.crc;
+  data_page_header = std::move(other138.data_page_header);
+  index_page_header = std::move(other138.index_page_header);
+  dictionary_page_header = std::move(other138.dictionary_page_header);
+  data_page_header_v2 = std::move(other138.data_page_header_v2);
+  __isset = other138.__isset;
+}
+PageHeader& PageHeader::operator=(const PageHeader& other139) {
+  type = other139.type;
+  uncompressed_page_size = other139.uncompressed_page_size;
+  compressed_page_size = other139.compressed_page_size;
+  crc = other139.crc;
+  data_page_header = other139.data_page_header;
+  index_page_header = other139.index_page_header;
+  dictionary_page_header = other139.dictionary_page_header;
+  data_page_header_v2 = other139.data_page_header_v2;
+  __isset = other139.__isset;
   return *this;
 }
-PageHeader& PageHeader::operator=(PageHeader&& other136) noexcept {
-  type = other136.type;
-  uncompressed_page_size = other136.uncompressed_page_size;
-  compressed_page_size = other136.compressed_page_size;
-  crc = other136.crc;
-  data_page_header = std::move(other136.data_page_header);
-  index_page_header = std::move(other136.index_page_header);
-  dictionary_page_header = std::move(other136.dictionary_page_header);
-  data_page_header_v2 = std::move(other136.data_page_header_v2);
-  __isset = other136.__isset;
+PageHeader& PageHeader::operator=(PageHeader&& other140) noexcept {
+  type = other140.type;
+  uncompressed_page_size = other140.uncompressed_page_size;
+  compressed_page_size = other140.compressed_page_size;
+  crc = other140.crc;
+  data_page_header = std::move(other140.data_page_header);
+  index_page_header = std::move(other140.index_page_header);
+  dictionary_page_header = std::move(other140.dictionary_page_header);
+  data_page_header_v2 = std::move(other140.data_page_header_v2);
+  __isset = other140.__isset;
   return *this;
 }
 void PageHeader::printTo(std::ostream& out) const {
@@ -4901,26 +5000,26 @@ void swap(KeyValue &a, KeyValue &b) {
   swap(a.__isset, b.__isset);
 }
 
-KeyValue::KeyValue(const KeyValue& other137) {
-  key = other137.key;
-  value = other137.value;
-  __isset = other137.__isset;
+KeyValue::KeyValue(const KeyValue& other141) {
+  key = other141.key;
+  value = other141.value;
+  __isset = other141.__isset;
 }
-KeyValue::KeyValue(KeyValue&& other138) noexcept {
-  key = std::move(other138.key);
-  value = std::move(other138.value);
-  __isset = other138.__isset;
+KeyValue::KeyValue(KeyValue&& other142) noexcept {
+  key = std::move(other142.key);
+  value = std::move(other142.value);
+  __isset = other142.__isset;
 }
-KeyValue& KeyValue::operator=(const KeyValue& other139) {
-  key = other139.key;
-  value = other139.value;
-  __isset = other139.__isset;
+KeyValue& KeyValue::operator=(const KeyValue& other143) {
+  key = other143.key;
+  value = other143.value;
+  __isset = other143.__isset;
   return *this;
 }
-KeyValue& KeyValue::operator=(KeyValue&& other140) noexcept {
-  key = std::move(other140.key);
-  value = std::move(other140.value);
-  __isset = other140.__isset;
+KeyValue& KeyValue::operator=(KeyValue&& other144) noexcept {
+  key = std::move(other144.key);
+  value = std::move(other144.value);
+  __isset = other144.__isset;
   return *this;
 }
 void KeyValue::printTo(std::ostream& out) const {
@@ -5049,26 +5148,26 @@ void swap(SortingColumn &a, SortingColumn &b) {
   swap(a.nulls_first, b.nulls_first);
 }
 
-SortingColumn::SortingColumn(const SortingColumn& other141) noexcept {
-  column_idx = other141.column_idx;
-  descending = other141.descending;
-  nulls_first = other141.nulls_first;
+SortingColumn::SortingColumn(const SortingColumn& other145) noexcept {
+  column_idx = other145.column_idx;
+  descending = other145.descending;
+  nulls_first = other145.nulls_first;
 }
-SortingColumn::SortingColumn(SortingColumn&& other142) noexcept {
-  column_idx = other142.column_idx;
-  descending = other142.descending;
-  nulls_first = other142.nulls_first;
+SortingColumn::SortingColumn(SortingColumn&& other146) noexcept {
+  column_idx = other146.column_idx;
+  descending = other146.descending;
+  nulls_first = other146.nulls_first;
 }
-SortingColumn& SortingColumn::operator=(const SortingColumn& other143) noexcept {
-  column_idx = other143.column_idx;
-  descending = other143.descending;
-  nulls_first = other143.nulls_first;
+SortingColumn& SortingColumn::operator=(const SortingColumn& other147) noexcept {
+  column_idx = other147.column_idx;
+  descending = other147.descending;
+  nulls_first = other147.nulls_first;
   return *this;
 }
-SortingColumn& SortingColumn::operator=(SortingColumn&& other144) noexcept {
-  column_idx = other144.column_idx;
-  descending = other144.descending;
-  nulls_first = other144.nulls_first;
+SortingColumn& SortingColumn::operator=(SortingColumn&& other148) noexcept {
+  column_idx = other148.column_idx;
+  descending = other148.descending;
+  nulls_first = other148.nulls_first;
   return *this;
 }
 void SortingColumn::printTo(std::ostream& out) const {
@@ -5129,9 +5228,9 @@ uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast145;
-          xfer += iprot->readI32(ecast145);
-          this->page_type = static_cast<PageType::type>(ecast145);
+          int32_t ecast149;
+          xfer += iprot->readI32(ecast149);
+          this->page_type = static_cast<PageType::type>(ecast149);
           isset_page_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5139,9 +5238,9 @@ uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 2:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast146;
-          xfer += iprot->readI32(ecast146);
-          this->encoding = static_cast<Encoding::type>(ecast146);
+          int32_t ecast150;
+          xfer += iprot->readI32(ecast150);
+          this->encoding = static_cast<Encoding::type>(ecast150);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5202,26 +5301,26 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) {
   swap(a.count, b.count);
 }
 
-PageEncodingStats::PageEncodingStats(const PageEncodingStats& other147) noexcept {
-  page_type = other147.page_type;
-  encoding = other147.encoding;
-  count = other147.count;
+PageEncodingStats::PageEncodingStats(const PageEncodingStats& other151) noexcept {
+  page_type = other151.page_type;
+  encoding = other151.encoding;
+  count = other151.count;
 }
-PageEncodingStats::PageEncodingStats(PageEncodingStats&& other148) noexcept {
-  page_type = other148.page_type;
-  encoding = other148.encoding;
-  count = other148.count;
+PageEncodingStats::PageEncodingStats(PageEncodingStats&& other152) noexcept {
+  page_type = other152.page_type;
+  encoding = other152.encoding;
+  count = other152.count;
 }
-PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other149) noexcept {
-  page_type = other149.page_type;
-  encoding = other149.encoding;
-  count = other149.count;
+PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other153) noexcept {
+  page_type = other153.page_type;
+  encoding = other153.encoding;
+  count = other153.count;
   return *this;
 }
-PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other150) noexcept {
-  page_type = other150.page_type;
-  encoding = other150.encoding;
-  count = other150.count;
+PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other154) noexcept {
+  page_type = other154.page_type;
+  encoding = other154.encoding;
+  count = other154.count;
   return *this;
 }
 void PageEncodingStats::printTo(std::ostream& out) const {
@@ -5337,9 +5436,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast151;
-          xfer += iprot->readI32(ecast151);
-          this->type = static_cast<Type::type>(ecast151);
+          int32_t ecast155;
+          xfer += iprot->readI32(ecast155);
+          this->type = static_cast<Type::type>(ecast155);
           isset_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5349,16 +5448,16 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->encodings.clear();
-            uint32_t _size152;
-            ::apache::thrift::protocol::TType _etype155;
-            xfer += iprot->readListBegin(_etype155, _size152);
-            this->encodings.resize(_size152);
-            uint32_t _i156;
-            for (_i156 = 0; _i156 < _size152; ++_i156)
+            uint32_t _size156;
+            ::apache::thrift::protocol::TType _etype159;
+            xfer += iprot->readListBegin(_etype159, _size156);
+            this->encodings.resize(_size156);
+            uint32_t _i160;
+            for (_i160 = 0; _i160 < _size156; ++_i160)
             {
-              int32_t ecast157;
-              xfer += iprot->readI32(ecast157);
-              this->encodings[_i156] = static_cast<Encoding::type>(ecast157);
+              int32_t ecast161;
+              xfer += iprot->readI32(ecast161);
+              this->encodings[_i160] = static_cast<Encoding::type>(ecast161);
             }
             xfer += iprot->readListEnd();
           }
@@ -5371,14 +5470,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->path_in_schema.clear();
-            uint32_t _size158;
-            ::apache::thrift::protocol::TType _etype161;
-            xfer += iprot->readListBegin(_etype161, _size158);
-            this->path_in_schema.resize(_size158);
-            uint32_t _i162;
-            for (_i162 = 0; _i162 < _size158; ++_i162)
+            uint32_t _size162;
+            ::apache::thrift::protocol::TType _etype165;
+            xfer += iprot->readListBegin(_etype165, _size162);
+            this->path_in_schema.resize(_size162);
+            uint32_t _i166;
+            for (_i166 = 0; _i166 < _size162; ++_i166)
             {
-              xfer += iprot->readString(this->path_in_schema[_i162]);
+              xfer += iprot->readString(this->path_in_schema[_i166]);
             }
             xfer += iprot->readListEnd();
           }
@@ -5389,9 +5488,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast163;
-          xfer += iprot->readI32(ecast163);
-          this->codec = static_cast<CompressionCodec::type>(ecast163);
+          int32_t ecast167;
+          xfer += iprot->readI32(ecast167);
+          this->codec = static_cast<CompressionCodec::type>(ecast167);
           isset_codec = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5425,14 +5524,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->key_value_metadata.clear();
-            uint32_t _size164;
-            ::apache::thrift::protocol::TType _etype167;
-            xfer += iprot->readListBegin(_etype167, _size164);
-            this->key_value_metadata.resize(_size164);
-            uint32_t _i168;
-            for (_i168 = 0; _i168 < _size164; ++_i168)
+            uint32_t _size168;
+            ::apache::thrift::protocol::TType _etype171;
+            xfer += iprot->readListBegin(_etype171, _size168);
+            this->key_value_metadata.resize(_size168);
+            uint32_t _i172;
+            for (_i172 = 0; _i172 < _size168; ++_i172)
             {
-              xfer += this->key_value_metadata[_i168].read(iprot);
+              xfer += this->key_value_metadata[_i172].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -5477,14 +5576,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->encoding_stats.clear();
-            uint32_t _size169;
-            ::apache::thrift::protocol::TType _etype172;
-            xfer += iprot->readListBegin(_etype172, _size169);
-            this->encoding_stats.resize(_size169);
-            uint32_t _i173;
-            for (_i173 = 0; _i173 < _size169; ++_i173)
+            uint32_t _size173;
+            ::apache::thrift::protocol::TType _etype176;
+            xfer += iprot->readListBegin(_etype176, _size173);
+            this->encoding_stats.resize(_size173);
+            uint32_t _i177;
+            for (_i177 = 0; _i177 < _size173; ++_i177)
             {
-              xfer += this->encoding_stats[_i173].read(iprot);
+              xfer += this->encoding_stats[_i177].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -5541,10 +5640,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
   xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size()));
-    std::vector<Encoding::type> ::const_iterator _iter174;
-    for (_iter174 = this->encodings.begin(); _iter174 != this->encodings.end(); ++_iter174)
+    std::vector<Encoding::type> ::const_iterator _iter178;
+    for (_iter178 = this->encodings.begin(); _iter178 != this->encodings.end(); ++_iter178)
     {
-      xfer += oprot->writeI32(static_cast<int32_t>((*_iter174)));
+      xfer += oprot->writeI32(static_cast<int32_t>((*_iter178)));
     }
     xfer += oprot->writeListEnd();
   }
@@ -5553,10 +5652,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
   xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
-    std::vector<std::string> ::const_iterator _iter175;
-    for (_iter175 = this->path_in_schema.begin(); _iter175 != this->path_in_schema.end(); ++_iter175)
+    std::vector<std::string> ::const_iterator _iter179;
+    for (_iter179 = this->path_in_schema.begin(); _iter179 != this->path_in_schema.end(); ++_iter179)
     {
-      xfer += oprot->writeString((*_iter175));
+      xfer += oprot->writeString((*_iter179));
     }
     xfer += oprot->writeListEnd();
   }
@@ -5582,10 +5681,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
     xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
-      std::vector<KeyValue> ::const_iterator _iter176;
-      for (_iter176 = this->key_value_metadata.begin(); _iter176 != this->key_value_metadata.end(); ++_iter176)
+      std::vector<KeyValue> ::const_iterator _iter180;
+      for (_iter180 = this->key_value_metadata.begin(); _iter180 != this->key_value_metadata.end(); ++_iter180)
       {
-        xfer += (*_iter176).write(oprot);
+        xfer += (*_iter180).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -5614,10 +5713,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
     xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->encoding_stats.size()));
-      std::vector<PageEncodingStats> ::const_iterator _iter177;
-      for (_iter177 = this->encoding_stats.begin(); _iter177 != this->encoding_stats.end(); ++_iter177)
+      std::vector<PageEncodingStats> ::const_iterator _iter181;
+      for (_iter181 = this->encoding_stats.begin(); _iter181 != this->encoding_stats.end(); ++_iter181)
       {
-        xfer += (*_iter177).write(oprot);
+        xfer += (*_iter181).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -5652,74 +5751,74 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnMetaData::ColumnMetaData(const ColumnMetaData& other178) {
-  type = other178.type;
-  encodings = other178.encodings;
-  path_in_schema = other178.path_in_schema;
-  codec = other178.codec;
-  num_values = other178.num_values;
-  total_uncompressed_size = other178.total_uncompressed_size;
-  total_compressed_size = other178.total_compressed_size;
-  key_value_metadata = other178.key_value_metadata;
-  data_page_offset = other178.data_page_offset;
-  index_page_offset = other178.index_page_offset;
-  dictionary_page_offset = other178.dictionary_page_offset;
-  statistics = other178.statistics;
-  encoding_stats = other178.encoding_stats;
-  bloom_filter_offset = other178.bloom_filter_offset;
-  __isset = other178.__isset;
-}
-ColumnMetaData::ColumnMetaData(ColumnMetaData&& other179) noexcept {
-  type = other179.type;
-  encodings = std::move(other179.encodings);
-  path_in_schema = std::move(other179.path_in_schema);
-  codec = other179.codec;
-  num_values = other179.num_values;
-  total_uncompressed_size = other179.total_uncompressed_size;
-  total_compressed_size = other179.total_compressed_size;
-  key_value_metadata = std::move(other179.key_value_metadata);
-  data_page_offset = other179.data_page_offset;
-  index_page_offset = other179.index_page_offset;
-  dictionary_page_offset = other179.dictionary_page_offset;
-  statistics = std::move(other179.statistics);
-  encoding_stats = std::move(other179.encoding_stats);
-  bloom_filter_offset = other179.bloom_filter_offset;
-  __isset = other179.__isset;
-}
-ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other180) {
-  type = other180.type;
-  encodings = other180.encodings;
-  path_in_schema = other180.path_in_schema;
-  codec = other180.codec;
-  num_values = other180.num_values;
-  total_uncompressed_size = other180.total_uncompressed_size;
-  total_compressed_size = other180.total_compressed_size;
-  key_value_metadata = other180.key_value_metadata;
-  data_page_offset = other180.data_page_offset;
-  index_page_offset = other180.index_page_offset;
-  dictionary_page_offset = other180.dictionary_page_offset;
-  statistics = other180.statistics;
-  encoding_stats = other180.encoding_stats;
-  bloom_filter_offset = other180.bloom_filter_offset;
-  __isset = other180.__isset;
+ColumnMetaData::ColumnMetaData(const ColumnMetaData& other182) {
+  type = other182.type;
+  encodings = other182.encodings;
+  path_in_schema = other182.path_in_schema;
+  codec = other182.codec;
+  num_values = other182.num_values;
+  total_uncompressed_size = other182.total_uncompressed_size;
+  total_compressed_size = other182.total_compressed_size;
+  key_value_metadata = other182.key_value_metadata;
+  data_page_offset = other182.data_page_offset;
+  index_page_offset = other182.index_page_offset;
+  dictionary_page_offset = other182.dictionary_page_offset;
+  statistics = other182.statistics;
+  encoding_stats = other182.encoding_stats;
+  bloom_filter_offset = other182.bloom_filter_offset;
+  __isset = other182.__isset;
+}
+ColumnMetaData::ColumnMetaData(ColumnMetaData&& other183) noexcept {
+  type = other183.type;
+  encodings = std::move(other183.encodings);
+  path_in_schema = std::move(other183.path_in_schema);
+  codec = other183.codec;
+  num_values = other183.num_values;
+  total_uncompressed_size = other183.total_uncompressed_size;
+  total_compressed_size = other183.total_compressed_size;
+  key_value_metadata = std::move(other183.key_value_metadata);
+  data_page_offset = other183.data_page_offset;
+  index_page_offset = other183.index_page_offset;
+  dictionary_page_offset = other183.dictionary_page_offset;
+  statistics = std::move(other183.statistics);
+  encoding_stats = std::move(other183.encoding_stats);
+  bloom_filter_offset = other183.bloom_filter_offset;
+  __isset = other183.__isset;
+}
+ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other184) {
+  type = other184.type;
+  encodings = other184.encodings;
+  path_in_schema = other184.path_in_schema;
+  codec = other184.codec;
+  num_values = other184.num_values;
+  total_uncompressed_size = other184.total_uncompressed_size;
+  total_compressed_size = other184.total_compressed_size;
+  key_value_metadata = other184.key_value_metadata;
+  data_page_offset = other184.data_page_offset;
+  index_page_offset = other184.index_page_offset;
+  dictionary_page_offset = other184.dictionary_page_offset;
+  statistics = other184.statistics;
+  encoding_stats = other184.encoding_stats;
+  bloom_filter_offset = other184.bloom_filter_offset;
+  __isset = other184.__isset;
   return *this;
 }
-ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other181) noexcept {
-  type = other181.type;
-  encodings = std::move(other181.encodings);
-  path_in_schema = std::move(other181.path_in_schema);
-  codec = other181.codec;
-  num_values = other181.num_values;
-  total_uncompressed_size = other181.total_uncompressed_size;
-  total_compressed_size = other181.total_compressed_size;
-  key_value_metadata = std::move(other181.key_value_metadata);
-  data_page_offset = other181.data_page_offset;
-  index_page_offset = other181.index_page_offset;
-  dictionary_page_offset = other181.dictionary_page_offset;
-  statistics = std::move(other181.statistics);
-  encoding_stats = std::move(other181.encoding_stats);
-  bloom_filter_offset = other181.bloom_filter_offset;
-  __isset = other181.__isset;
+ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other185) noexcept {
+  type = other185.type;
+  encodings = std::move(other185.encodings);
+  path_in_schema = std::move(other185.path_in_schema);
+  codec = other185.codec;
+  num_values = other185.num_values;
+  total_uncompressed_size = other185.total_uncompressed_size;
+  total_compressed_size = other185.total_compressed_size;
+  key_value_metadata = std::move(other185.key_value_metadata);
+  data_page_offset = other185.data_page_offset;
+  index_page_offset = other185.index_page_offset;
+  dictionary_page_offset = other185.dictionary_page_offset;
+  statistics = std::move(other185.statistics);
+  encoding_stats = std::move(other185.encoding_stats);
+  bloom_filter_offset = other185.bloom_filter_offset;
+  __isset = other185.__isset;
   return *this;
 }
 void ColumnMetaData::printTo(std::ostream& out) const {
@@ -5797,18 +5896,18 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) {
   (void) b;
 }
 
-EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other182) noexcept {
-  (void) other182;
+EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other186) noexcept {
+  (void) other186;
 }
-EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other183) noexcept {
-  (void) other183;
+EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other187) noexcept {
+  (void) other187;
 }
-EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other184) noexcept {
-  (void) other184;
+EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other188) noexcept {
+  (void) other188;
   return *this;
 }
-EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other185) noexcept {
-  (void) other185;
+EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other189) noexcept {
+  (void) other189;
   return *this;
 }
 void EncryptionWithFooterKey::printTo(std::ostream& out) const {
@@ -5863,14 +5962,14 @@ uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* ip
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->path_in_schema.clear();
-            uint32_t _size186;
-            ::apache::thrift::protocol::TType _etype189;
-            xfer += iprot->readListBegin(_etype189, _size186);
-            this->path_in_schema.resize(_size186);
-            uint32_t _i190;
-            for (_i190 = 0; _i190 < _size186; ++_i190)
+            uint32_t _size190;
+            ::apache::thrift::protocol::TType _etype193;
+            xfer += iprot->readListBegin(_etype193, _size190);
+            this->path_in_schema.resize(_size190);
+            uint32_t _i194;
+            for (_i194 = 0; _i194 < _size190; ++_i194)
             {
-              xfer += iprot->readString(this->path_in_schema[_i190]);
+              xfer += iprot->readString(this->path_in_schema[_i194]);
             }
             xfer += iprot->readListEnd();
           }
@@ -5909,10 +6008,10 @@ uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* o
   xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
-    std::vector<std::string> ::const_iterator _iter191;
-    for (_iter191 = this->path_in_schema.begin(); _iter191 != this->path_in_schema.end(); ++_iter191)
+    std::vector<std::string> ::const_iterator _iter195;
+    for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195)
     {
-      xfer += oprot->writeString((*_iter191));
+      xfer += oprot->writeString((*_iter195));
     }
     xfer += oprot->writeListEnd();
   }
@@ -5935,26 +6034,26 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) {
   swap(a.__isset, b.__isset);
 }
 
-EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other192) {
-  path_in_schema = other192.path_in_schema;
-  key_metadata = other192.key_metadata;
-  __isset = other192.__isset;
+EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other196) {
+  path_in_schema = other196.path_in_schema;
+  key_metadata = other196.key_metadata;
+  __isset = other196.__isset;
 }
-EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other193) noexcept {
-  path_in_schema = std::move(other193.path_in_schema);
-  key_metadata = std::move(other193.key_metadata);
-  __isset = other193.__isset;
+EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other197) noexcept {
+  path_in_schema = std::move(other197.path_in_schema);
+  key_metadata = std::move(other197.key_metadata);
+  __isset = other197.__isset;
 }
-EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other194) {
-  path_in_schema = other194.path_in_schema;
-  key_metadata = other194.key_metadata;
-  __isset = other194.__isset;
+EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other198) {
+  path_in_schema = other198.path_in_schema;
+  key_metadata = other198.key_metadata;
+  __isset = other198.__isset;
   return *this;
 }
-EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other195) noexcept {
-  path_in_schema = std::move(other195.path_in_schema);
-  key_metadata = std::move(other195.key_metadata);
-  __isset = other195.__isset;
+EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other199) noexcept {
+  path_in_schema = std::move(other199.path_in_schema);
+  key_metadata = std::move(other199.key_metadata);
+  __isset = other199.__isset;
   return *this;
 }
 void EncryptionWithColumnKey::printTo(std::ostream& out) const {
@@ -6062,26 +6161,26 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other196) {
-  ENCRYPTION_WITH_FOOTER_KEY = other196.ENCRYPTION_WITH_FOOTER_KEY;
-  ENCRYPTION_WITH_COLUMN_KEY = other196.ENCRYPTION_WITH_COLUMN_KEY;
-  __isset = other196.__isset;
+ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other200) {
+  ENCRYPTION_WITH_FOOTER_KEY = other200.ENCRYPTION_WITH_FOOTER_KEY;
+  ENCRYPTION_WITH_COLUMN_KEY = other200.ENCRYPTION_WITH_COLUMN_KEY;
+  __isset = other200.__isset;
 }
-ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other197) noexcept {
-  ENCRYPTION_WITH_FOOTER_KEY = std::move(other197.ENCRYPTION_WITH_FOOTER_KEY);
-  ENCRYPTION_WITH_COLUMN_KEY = std::move(other197.ENCRYPTION_WITH_COLUMN_KEY);
-  __isset = other197.__isset;
+ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other201) noexcept {
+  ENCRYPTION_WITH_FOOTER_KEY = std::move(other201.ENCRYPTION_WITH_FOOTER_KEY);
+  ENCRYPTION_WITH_COLUMN_KEY = std::move(other201.ENCRYPTION_WITH_COLUMN_KEY);
+  __isset = other201.__isset;
 }
-ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other198) {
-  ENCRYPTION_WITH_FOOTER_KEY = other198.ENCRYPTION_WITH_FOOTER_KEY;
-  ENCRYPTION_WITH_COLUMN_KEY = other198.ENCRYPTION_WITH_COLUMN_KEY;
-  __isset = other198.__isset;
+ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other202) {
+  ENCRYPTION_WITH_FOOTER_KEY = other202.ENCRYPTION_WITH_FOOTER_KEY;
+  ENCRYPTION_WITH_COLUMN_KEY = other202.ENCRYPTION_WITH_COLUMN_KEY;
+  __isset = other202.__isset;
   return *this;
 }
-ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other199) noexcept {
-  ENCRYPTION_WITH_FOOTER_KEY = std::move(other199.ENCRYPTION_WITH_FOOTER_KEY);
-  ENCRYPTION_WITH_COLUMN_KEY = std::move(other199.ENCRYPTION_WITH_COLUMN_KEY);
-  __isset = other199.__isset;
+ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other203) noexcept {
+  ENCRYPTION_WITH_FOOTER_KEY = std::move(other203.ENCRYPTION_WITH_FOOTER_KEY);
+  ENCRYPTION_WITH_COLUMN_KEY = std::move(other203.ENCRYPTION_WITH_COLUMN_KEY);
+  __isset = other203.__isset;
   return *this;
 }
 void ColumnCryptoMetaData::printTo(std::ostream& out) const {
@@ -6323,54 +6422,54 @@ void swap(ColumnChunk &a, ColumnChunk &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnChunk::ColumnChunk(const ColumnChunk& other200) {
-  file_path = other200.file_path;
-  file_offset = other200.file_offset;
-  meta_data = other200.meta_data;
-  offset_index_offset = other200.offset_index_offset;
-  offset_index_length = other200.offset_index_length;
-  column_index_offset = other200.column_index_offset;
-  column_index_length = other200.column_index_length;
-  crypto_metadata = other200.crypto_metadata;
-  encrypted_column_metadata = other200.encrypted_column_metadata;
-  __isset = other200.__isset;
-}
-ColumnChunk::ColumnChunk(ColumnChunk&& other201) noexcept {
-  file_path = std::move(other201.file_path);
-  file_offset = other201.file_offset;
-  meta_data = std::move(other201.meta_data);
-  offset_index_offset = other201.offset_index_offset;
-  offset_index_length = other201.offset_index_length;
-  column_index_offset = other201.column_index_offset;
-  column_index_length = other201.column_index_length;
-  crypto_metadata = std::move(other201.crypto_metadata);
-  encrypted_column_metadata = std::move(other201.encrypted_column_metadata);
-  __isset = other201.__isset;
-}
-ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other202) {
-  file_path = other202.file_path;
-  file_offset = other202.file_offset;
-  meta_data = other202.meta_data;
-  offset_index_offset = other202.offset_index_offset;
-  offset_index_length = other202.offset_index_length;
-  column_index_offset = other202.column_index_offset;
-  column_index_length = other202.column_index_length;
-  crypto_metadata = other202.crypto_metadata;
-  encrypted_column_metadata = other202.encrypted_column_metadata;
-  __isset = other202.__isset;
+ColumnChunk::ColumnChunk(const ColumnChunk& other204) {
+  file_path = other204.file_path;
+  file_offset = other204.file_offset;
+  meta_data = other204.meta_data;
+  offset_index_offset = other204.offset_index_offset;
+  offset_index_length = other204.offset_index_length;
+  column_index_offset = other204.column_index_offset;
+  column_index_length = other204.column_index_length;
+  crypto_metadata = other204.crypto_metadata;
+  encrypted_column_metadata = other204.encrypted_column_metadata;
+  __isset = other204.__isset;
+}
+ColumnChunk::ColumnChunk(ColumnChunk&& other205) noexcept {
+  file_path = std::move(other205.file_path);
+  file_offset = other205.file_offset;
+  meta_data = std::move(other205.meta_data);
+  offset_index_offset = other205.offset_index_offset;
+  offset_index_length = other205.offset_index_length;
+  column_index_offset = other205.column_index_offset;
+  column_index_length = other205.column_index_length;
+  crypto_metadata = std::move(other205.crypto_metadata);
+  encrypted_column_metadata = std::move(other205.encrypted_column_metadata);
+  __isset = other205.__isset;
+}
+ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other206) {
+  file_path = other206.file_path;
+  file_offset = other206.file_offset;
+  meta_data = other206.meta_data;
+  offset_index_offset = other206.offset_index_offset;
+  offset_index_length = other206.offset_index_length;
+  column_index_offset = other206.column_index_offset;
+  column_index_length = other206.column_index_length;
+  crypto_metadata = other206.crypto_metadata;
+  encrypted_column_metadata = other206.encrypted_column_metadata;
+  __isset = other206.__isset;
   return *this;
 }
-ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other203) noexcept {
-  file_path = std::move(other203.file_path);
-  file_offset = other203.file_offset;
-  meta_data = std::move(other203.meta_data);
-  offset_index_offset = other203.offset_index_offset;
-  offset_index_length = other203.offset_index_length;
-  column_index_offset = other203.column_index_offset;
-  column_index_length = other203.column_index_length;
-  crypto_metadata = std::move(other203.crypto_metadata);
-  encrypted_column_metadata = std::move(other203.encrypted_column_metadata);
-  __isset = other203.__isset;
+ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other207) noexcept {
+  file_path = std::move(other207.file_path);
+  file_offset = other207.file_offset;
+  meta_data = std::move(other207.meta_data);
+  offset_index_offset = other207.offset_index_offset;
+  offset_index_length = other207.offset_index_length;
+  column_index_offset = other207.column_index_offset;
+  column_index_length = other207.column_index_length;
+  crypto_metadata = std::move(other207.crypto_metadata);
+  encrypted_column_metadata = std::move(other207.encrypted_column_metadata);
+  __isset = other207.__isset;
   return *this;
 }
 void ColumnChunk::printTo(std::ostream& out) const {
@@ -6459,14 +6558,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->columns.clear();
-            uint32_t _size204;
-            ::apache::thrift::protocol::TType _etype207;
-            xfer += iprot->readListBegin(_etype207, _size204);
-            this->columns.resize(_size204);
-            uint32_t _i208;
-            for (_i208 = 0; _i208 < _size204; ++_i208)
+            uint32_t _size208;
+            ::apache::thrift::protocol::TType _etype211;
+            xfer += iprot->readListBegin(_etype211, _size208);
+            this->columns.resize(_size208);
+            uint32_t _i212;
+            for (_i212 = 0; _i212 < _size208; ++_i212)
             {
-              xfer += this->columns[_i208].read(iprot);
+              xfer += this->columns[_i212].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -6495,14 +6594,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->sorting_columns.clear();
-            uint32_t _size209;
-            ::apache::thrift::protocol::TType _etype212;
-            xfer += iprot->readListBegin(_etype212, _size209);
-            this->sorting_columns.resize(_size209);
-            uint32_t _i213;
-            for (_i213 = 0; _i213 < _size209; ++_i213)
+            uint32_t _size213;
+            ::apache::thrift::protocol::TType _etype216;
+            xfer += iprot->readListBegin(_etype216, _size213);
+            this->sorting_columns.resize(_size213);
+            uint32_t _i217;
+            for (_i217 = 0; _i217 < _size213; ++_i217)
             {
-              xfer += this->sorting_columns[_i213].read(iprot);
+              xfer += this->sorting_columns[_i217].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -6561,10 +6660,10 @@ uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
   xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size()));
-    std::vector<ColumnChunk> ::const_iterator _iter214;
-    for (_iter214 = this->columns.begin(); _iter214 != this->columns.end(); ++_iter214)
+    std::vector<ColumnChunk> ::const_iterator _iter218;
+    for (_iter218 = this->columns.begin(); _iter218 != this->columns.end(); ++_iter218)
     {
-      xfer += (*_iter214).write(oprot);
+      xfer += (*_iter218).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -6582,10 +6681,10 @@ uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
     xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size()));
-      std::vector<SortingColumn> ::const_iterator _iter215;
-      for (_iter215 = this->sorting_columns.begin(); _iter215 != this->sorting_columns.end(); ++_iter215)
+      std::vector<SortingColumn> ::const_iterator _iter219;
+      for (_iter219 = this->sorting_columns.begin(); _iter219 != this->sorting_columns.end(); ++_iter219)
       {
-        xfer += (*_iter215).write(oprot);
+        xfer += (*_iter219).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -6623,46 +6722,46 @@ void swap(RowGroup &a, RowGroup &b) {
   swap(a.__isset, b.__isset);
 }
 
-RowGroup::RowGroup(const RowGroup& other216) {
-  columns = other216.columns;
-  total_byte_size = other216.total_byte_size;
-  num_rows = other216.num_rows;
-  sorting_columns = other216.sorting_columns;
-  file_offset = other216.file_offset;
-  total_compressed_size = other216.total_compressed_size;
-  ordinal = other216.ordinal;
-  __isset = other216.__isset;
-}
-RowGroup::RowGroup(RowGroup&& other217) noexcept {
-  columns = std::move(other217.columns);
-  total_byte_size = other217.total_byte_size;
-  num_rows = other217.num_rows;
-  sorting_columns = std::move(other217.sorting_columns);
-  file_offset = other217.file_offset;
-  total_compressed_size = other217.total_compressed_size;
-  ordinal = other217.ordinal;
-  __isset = other217.__isset;
-}
-RowGroup& RowGroup::operator=(const RowGroup& other218) {
-  columns = other218.columns;
-  total_byte_size = other218.total_byte_size;
-  num_rows = other218.num_rows;
-  sorting_columns = other218.sorting_columns;
-  file_offset = other218.file_offset;
-  total_compressed_size = other218.total_compressed_size;
-  ordinal = other218.ordinal;
-  __isset = other218.__isset;
+RowGroup::RowGroup(const RowGroup& other220) {
+  columns = other220.columns;
+  total_byte_size = other220.total_byte_size;
+  num_rows = other220.num_rows;
+  sorting_columns = other220.sorting_columns;
+  file_offset = other220.file_offset;
+  total_compressed_size = other220.total_compressed_size;
+  ordinal = other220.ordinal;
+  __isset = other220.__isset;
+}
+RowGroup::RowGroup(RowGroup&& other221) noexcept {
+  columns = std::move(other221.columns);
+  total_byte_size = other221.total_byte_size;
+  num_rows = other221.num_rows;
+  sorting_columns = std::move(other221.sorting_columns);
+  file_offset = other221.file_offset;
+  total_compressed_size = other221.total_compressed_size;
+  ordinal = other221.ordinal;
+  __isset = other221.__isset;
+}
+RowGroup& RowGroup::operator=(const RowGroup& other222) {
+  columns = other222.columns;
+  total_byte_size = other222.total_byte_size;
+  num_rows = other222.num_rows;
+  sorting_columns = other222.sorting_columns;
+  file_offset = other222.file_offset;
+  total_compressed_size = other222.total_compressed_size;
+  ordinal = other222.ordinal;
+  __isset = other222.__isset;
   return *this;
 }
-RowGroup& RowGroup::operator=(RowGroup&& other219) noexcept {
-  columns = std::move(other219.columns);
-  total_byte_size = other219.total_byte_size;
-  num_rows = other219.num_rows;
-  sorting_columns = std::move(other219.sorting_columns);
-  file_offset = other219.file_offset;
-  total_compressed_size = other219.total_compressed_size;
-  ordinal = other219.ordinal;
-  __isset = other219.__isset;
+RowGroup& RowGroup::operator=(RowGroup&& other223) noexcept {
+  columns = std::move(other223.columns);
+  total_byte_size = other223.total_byte_size;
+  num_rows = other223.num_rows;
+  sorting_columns = std::move(other223.sorting_columns);
+  file_offset = other223.file_offset;
+  total_compressed_size = other223.total_compressed_size;
+  ordinal = other223.ordinal;
+  __isset = other223.__isset;
   return *this;
 }
 void RowGroup::printTo(std::ostream& out) const {
@@ -6733,18 +6832,18 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) {
   (void) b;
 }
 
-TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other220) noexcept {
-  (void) other220;
+TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other224) noexcept {
+  (void) other224;
 }
-TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other221) noexcept {
-  (void) other221;
+TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other225) noexcept {
+  (void) other225;
 }
-TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other222) noexcept {
-  (void) other222;
+TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other226) noexcept {
+  (void) other226;
   return *this;
 }
-TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other223) noexcept {
-  (void) other223;
+TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other227) noexcept {
+  (void) other227;
   return *this;
 }
 void TypeDefinedOrder::printTo(std::ostream& out) const {
@@ -6831,22 +6930,22 @@ void swap(ColumnOrder &a, ColumnOrder &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnOrder::ColumnOrder(const ColumnOrder& other224) noexcept {
-  TYPE_ORDER = other224.TYPE_ORDER;
-  __isset = other224.__isset;
+ColumnOrder::ColumnOrder(const ColumnOrder& other228) noexcept {
+  TYPE_ORDER = other228.TYPE_ORDER;
+  __isset = other228.__isset;
 }
-ColumnOrder::ColumnOrder(ColumnOrder&& other225) noexcept {
-  TYPE_ORDER = std::move(other225.TYPE_ORDER);
-  __isset = other225.__isset;
+ColumnOrder::ColumnOrder(ColumnOrder&& other229) noexcept {
+  TYPE_ORDER = std::move(other229.TYPE_ORDER);
+  __isset = other229.__isset;
 }
-ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other226) noexcept {
-  TYPE_ORDER = other226.TYPE_ORDER;
-  __isset = other226.__isset;
+ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other230) noexcept {
+  TYPE_ORDER = other230.TYPE_ORDER;
+  __isset = other230.__isset;
   return *this;
 }
-ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other227) noexcept {
-  TYPE_ORDER = std::move(other227.TYPE_ORDER);
-  __isset = other227.__isset;
+ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other231) noexcept {
+  TYPE_ORDER = std::move(other231.TYPE_ORDER);
+  __isset = other231.__isset;
   return *this;
 }
 void ColumnOrder::printTo(std::ostream& out) const {
@@ -6974,26 +7073,26 @@ void swap(PageLocation &a, PageLocation &b) {
   swap(a.first_row_index, b.first_row_index);
 }
 
-PageLocation::PageLocation(const PageLocation& other228) noexcept {
-  offset = other228.offset;
-  compressed_page_size = other228.compressed_page_size;
-  first_row_index = other228.first_row_index;
+PageLocation::PageLocation(const PageLocation& other232) noexcept {
+  offset = other232.offset;
+  compressed_page_size = other232.compressed_page_size;
+  first_row_index = other232.first_row_index;
 }
-PageLocation::PageLocation(PageLocation&& other229) noexcept {
-  offset = other229.offset;
-  compressed_page_size = other229.compressed_page_size;
-  first_row_index = other229.first_row_index;
+PageLocation::PageLocation(PageLocation&& other233) noexcept {
+  offset = other233.offset;
+  compressed_page_size = other233.compressed_page_size;
+  first_row_index = other233.first_row_index;
 }
-PageLocation& PageLocation::operator=(const PageLocation& other230) noexcept {
-  offset = other230.offset;
-  compressed_page_size = other230.compressed_page_size;
-  first_row_index = other230.first_row_index;
+PageLocation& PageLocation::operator=(const PageLocation& other234) noexcept {
+  offset = other234.offset;
+  compressed_page_size = other234.compressed_page_size;
+  first_row_index = other234.first_row_index;
   return *this;
 }
-PageLocation& PageLocation::operator=(PageLocation&& other231) noexcept {
-  offset = other231.offset;
-  compressed_page_size = other231.compressed_page_size;
-  first_row_index = other231.first_row_index;
+PageLocation& PageLocation::operator=(PageLocation&& other235) noexcept {
+  offset = other235.offset;
+  compressed_page_size = other235.compressed_page_size;
+  first_row_index = other235.first_row_index;
   return *this;
 }
 void PageLocation::printTo(std::ostream& out) const {
@@ -7046,14 +7145,14 @@ uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->page_locations.clear();
-            uint32_t _size232;
-            ::apache::thrift::protocol::TType _etype235;
-            xfer += iprot->readListBegin(_etype235, _size232);
-            this->page_locations.resize(_size232);
-            uint32_t _i236;
-            for (_i236 = 0; _i236 < _size232; ++_i236)
+            uint32_t _size236;
+            ::apache::thrift::protocol::TType _etype239;
+            xfer += iprot->readListBegin(_etype239, _size236);
+            this->page_locations.resize(_size236);
+            uint32_t _i240;
+            for (_i240 = 0; _i240 < _size236; ++_i240)
             {
-              xfer += this->page_locations[_i236].read(iprot);
+              xfer += this->page_locations[_i240].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -7084,10 +7183,10 @@ uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->page_locations.size()));
-    std::vector<PageLocation> ::const_iterator _iter237;
-    for (_iter237 = this->page_locations.begin(); _iter237 != this->page_locations.end(); ++_iter237)
+    std::vector<PageLocation> ::const_iterator _iter241;
+    for (_iter241 = this->page_locations.begin(); _iter241 != this->page_locations.end(); ++_iter241)
     {
-      xfer += (*_iter237).write(oprot);
+      xfer += (*_iter241).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -7103,18 +7202,18 @@ void swap(OffsetIndex &a, OffsetIndex &b) {
   swap(a.page_locations, b.page_locations);
 }
 
-OffsetIndex::OffsetIndex(const OffsetIndex& other238) {
-  page_locations = other238.page_locations;
+OffsetIndex::OffsetIndex(const OffsetIndex& other242) {
+  page_locations = other242.page_locations;
 }
-OffsetIndex::OffsetIndex(OffsetIndex&& other239) noexcept {
-  page_locations = std::move(other239.page_locations);
+OffsetIndex::OffsetIndex(OffsetIndex&& other243) noexcept {
+  page_locations = std::move(other243.page_locations);
 }
-OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other240) {
-  page_locations = other240.page_locations;
+OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other244) {
+  page_locations = other244.page_locations;
   return *this;
 }
-OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other241) noexcept {
-  page_locations = std::move(other241.page_locations);
+OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other245) noexcept {
+  page_locations = std::move(other245.page_locations);
   return *this;
 }
 void OffsetIndex::printTo(std::ostream& out) const {
@@ -7185,14 +7284,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->null_pages.clear();
-            uint32_t _size242;
-            ::apache::thrift::protocol::TType _etype245;
-            xfer += iprot->readListBegin(_etype245, _size242);
-            this->null_pages.resize(_size242);
-            uint32_t _i246;
-            for (_i246 = 0; _i246 < _size242; ++_i246)
+            uint32_t _size246;
+            ::apache::thrift::protocol::TType _etype249;
+            xfer += iprot->readListBegin(_etype249, _size246);
+            this->null_pages.resize(_size246);
+            uint32_t _i250;
+            for (_i250 = 0; _i250 < _size246; ++_i250)
             {
-              xfer += iprot->readBool(this->null_pages[_i246]);
+              xfer += iprot->readBool(this->null_pages[_i250]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7205,14 +7304,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->min_values.clear();
-            uint32_t _size247;
-            ::apache::thrift::protocol::TType _etype250;
-            xfer += iprot->readListBegin(_etype250, _size247);
-            this->min_values.resize(_size247);
-            uint32_t _i251;
-            for (_i251 = 0; _i251 < _size247; ++_i251)
+            uint32_t _size251;
+            ::apache::thrift::protocol::TType _etype254;
+            xfer += iprot->readListBegin(_etype254, _size251);
+            this->min_values.resize(_size251);
+            uint32_t _i255;
+            for (_i255 = 0; _i255 < _size251; ++_i255)
             {
-              xfer += iprot->readBinary(this->min_values[_i251]);
+              xfer += iprot->readBinary(this->min_values[_i255]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7225,14 +7324,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->max_values.clear();
-            uint32_t _size252;
-            ::apache::thrift::protocol::TType _etype255;
-            xfer += iprot->readListBegin(_etype255, _size252);
-            this->max_values.resize(_size252);
-            uint32_t _i256;
-            for (_i256 = 0; _i256 < _size252; ++_i256)
+            uint32_t _size256;
+            ::apache::thrift::protocol::TType _etype259;
+            xfer += iprot->readListBegin(_etype259, _size256);
+            this->max_values.resize(_size256);
+            uint32_t _i260;
+            for (_i260 = 0; _i260 < _size256; ++_i260)
             {
-              xfer += iprot->readBinary(this->max_values[_i256]);
+              xfer += iprot->readBinary(this->max_values[_i260]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7243,9 +7342,9 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast257;
-          xfer += iprot->readI32(ecast257);
-          this->boundary_order = static_cast<BoundaryOrder::type>(ecast257);
+          int32_t ecast261;
+          xfer += iprot->readI32(ecast261);
+          this->boundary_order = static_cast<BoundaryOrder::type>(ecast261);
           isset_boundary_order = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -7255,14 +7354,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->null_counts.clear();
-            uint32_t _size258;
-            ::apache::thrift::protocol::TType _etype261;
-            xfer += iprot->readListBegin(_etype261, _size258);
-            this->null_counts.resize(_size258);
-            uint32_t _i262;
-            for (_i262 = 0; _i262 < _size258; ++_i262)
+            uint32_t _size262;
+            ::apache::thrift::protocol::TType _etype265;
+            xfer += iprot->readListBegin(_etype265, _size262);
+            this->null_counts.resize(_size262);
+            uint32_t _i266;
+            for (_i266 = 0; _i266 < _size262; ++_i266)
             {
-              xfer += iprot->readI64(this->null_counts[_i262]);
+              xfer += iprot->readI64(this->null_counts[_i266]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7299,10 +7398,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast<uint32_t>(this->null_pages.size()));
-    std::vector<bool> ::const_iterator _iter263;
-    for (_iter263 = this->null_pages.begin(); _iter263 != this->null_pages.end(); ++_iter263)
+    std::vector<bool> ::const_iterator _iter267;
+    for (_iter267 = this->null_pages.begin(); _iter267 != this->null_pages.end(); ++_iter267)
     {
-      xfer += oprot->writeBool((*_iter263));
+      xfer += oprot->writeBool((*_iter267));
     }
     xfer += oprot->writeListEnd();
   }
@@ -7311,10 +7410,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->min_values.size()));
-    std::vector<std::string> ::const_iterator _iter264;
-    for (_iter264 = this->min_values.begin(); _iter264 != this->min_values.end(); ++_iter264)
+    std::vector<std::string> ::const_iterator _iter268;
+    for (_iter268 = this->min_values.begin(); _iter268 != this->min_values.end(); ++_iter268)
     {
-      xfer += oprot->writeBinary((*_iter264));
+      xfer += oprot->writeBinary((*_iter268));
     }
     xfer += oprot->writeListEnd();
   }
@@ -7323,10 +7422,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->max_values.size()));
-    std::vector<std::string> ::const_iterator _iter265;
-    for (_iter265 = this->max_values.begin(); _iter265 != this->max_values.end(); ++_iter265)
+    std::vector<std::string> ::const_iterator _iter269;
+    for (_iter269 = this->max_values.begin(); _iter269 != this->max_values.end(); ++_iter269)
     {
-      xfer += oprot->writeBinary((*_iter265));
+      xfer += oprot->writeBinary((*_iter269));
     }
     xfer += oprot->writeListEnd();
   }
@@ -7340,10 +7439,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast<uint32_t>(this->null_counts.size()));
-      std::vector<int64_t> ::const_iterator _iter266;
-      for (_iter266 = this->null_counts.begin(); _iter266 != this->null_counts.end(); ++_iter266)
+      std::vector<int64_t> ::const_iterator _iter270;
+      for (_iter270 = this->null_counts.begin(); _iter270 != this->null_counts.end(); ++_iter270)
       {
-        xfer += oprot->writeI64((*_iter266));
+        xfer += oprot->writeI64((*_iter270));
       }
       xfer += oprot->writeListEnd();
     }
@@ -7364,38 +7463,38 @@ void swap(ColumnIndex &a, ColumnIndex &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnIndex::ColumnIndex(const ColumnIndex& other267) {
-  null_pages = other267.null_pages;
-  min_values = other267.min_values;
-  max_values = other267.max_values;
-  boundary_order = other267.boundary_order;
-  null_counts = other267.null_counts;
-  __isset = other267.__isset;
-}
-ColumnIndex::ColumnIndex(ColumnIndex&& other268) noexcept {
-  null_pages = std::move(other268.null_pages);
-  min_values = std::move(other268.min_values);
-  max_values = std::move(other268.max_values);
-  boundary_order = other268.boundary_order;
-  null_counts = std::move(other268.null_counts);
-  __isset = other268.__isset;
-}
-ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other269) {
-  null_pages = other269.null_pages;
-  min_values = other269.min_values;
-  max_values = other269.max_values;
-  boundary_order = other269.boundary_order;
-  null_counts = other269.null_counts;
-  __isset = other269.__isset;
+ColumnIndex::ColumnIndex(const ColumnIndex& other271) {
+  null_pages = other271.null_pages;
+  min_values = other271.min_values;
+  max_values = other271.max_values;
+  boundary_order = other271.boundary_order;
+  null_counts = other271.null_counts;
+  __isset = other271.__isset;
+}
+ColumnIndex::ColumnIndex(ColumnIndex&& other272) noexcept {
+  null_pages = std::move(other272.null_pages);
+  min_values = std::move(other272.min_values);
+  max_values = std::move(other272.max_values);
+  boundary_order = other272.boundary_order;
+  null_counts = std::move(other272.null_counts);
+  __isset = other272.__isset;
+}
+ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other273) {
+  null_pages = other273.null_pages;
+  min_values = other273.min_values;
+  max_values = other273.max_values;
+  boundary_order = other273.boundary_order;
+  null_counts = other273.null_counts;
+  __isset = other273.__isset;
   return *this;
 }
-ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other270) noexcept {
-  null_pages = std::move(other270.null_pages);
-  min_values = std::move(other270.min_values);
-  max_values = std::move(other270.max_values);
-  boundary_order = other270.boundary_order;
-  null_counts = std::move(other270.null_counts);
-  __isset = other270.__isset;
+ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other274) noexcept {
+  null_pages = std::move(other274.null_pages);
+  min_values = std::move(other274.min_values);
+  max_values = std::move(other274.max_values);
+  boundary_order = other274.boundary_order;
+  null_counts = std::move(other274.null_counts);
+  __isset = other274.__isset;
   return *this;
 }
 void ColumnIndex::printTo(std::ostream& out) const {
@@ -7525,30 +7624,30 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) {
   swap(a.__isset, b.__isset);
 }
 
-AesGcmV1::AesGcmV1(const AesGcmV1& other271) {
-  aad_prefix = other271.aad_prefix;
-  aad_file_unique = other271.aad_file_unique;
-  supply_aad_prefix = other271.supply_aad_prefix;
-  __isset = other271.__isset;
+AesGcmV1::AesGcmV1(const AesGcmV1& other275) {
+  aad_prefix = other275.aad_prefix;
+  aad_file_unique = other275.aad_file_unique;
+  supply_aad_prefix = other275.supply_aad_prefix;
+  __isset = other275.__isset;
 }
-AesGcmV1::AesGcmV1(AesGcmV1&& other272) noexcept {
-  aad_prefix = std::move(other272.aad_prefix);
-  aad_file_unique = std::move(other272.aad_file_unique);
-  supply_aad_prefix = other272.supply_aad_prefix;
-  __isset = other272.__isset;
+AesGcmV1::AesGcmV1(AesGcmV1&& other276) noexcept {
+  aad_prefix = std::move(other276.aad_prefix);
+  aad_file_unique = std::move(other276.aad_file_unique);
+  supply_aad_prefix = other276.supply_aad_prefix;
+  __isset = other276.__isset;
 }
-AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other273) {
-  aad_prefix = other273.aad_prefix;
-  aad_file_unique = other273.aad_file_unique;
-  supply_aad_prefix = other273.supply_aad_prefix;
-  __isset = other273.__isset;
+AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other277) {
+  aad_prefix = other277.aad_prefix;
+  aad_file_unique = other277.aad_file_unique;
+  supply_aad_prefix = other277.supply_aad_prefix;
+  __isset = other277.__isset;
   return *this;
 }
-AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other274) noexcept {
-  aad_prefix = std::move(other274.aad_prefix);
-  aad_file_unique = std::move(other274.aad_file_unique);
-  supply_aad_prefix = other274.supply_aad_prefix;
-  __isset = other274.__isset;
+AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other278) noexcept {
+  aad_prefix = std::move(other278.aad_prefix);
+  aad_file_unique = std::move(other278.aad_file_unique);
+  supply_aad_prefix = other278.supply_aad_prefix;
+  __isset = other278.__isset;
   return *this;
 }
 void AesGcmV1::printTo(std::ostream& out) const {
@@ -7676,30 +7775,30 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) {
   swap(a.__isset, b.__isset);
 }
 
-AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other275) {
-  aad_prefix = other275.aad_prefix;
-  aad_file_unique = other275.aad_file_unique;
-  supply_aad_prefix = other275.supply_aad_prefix;
-  __isset = other275.__isset;
+AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other279) {
+  aad_prefix = other279.aad_prefix;
+  aad_file_unique = other279.aad_file_unique;
+  supply_aad_prefix = other279.supply_aad_prefix;
+  __isset = other279.__isset;
 }
-AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other276) noexcept {
-  aad_prefix = std::move(other276.aad_prefix);
-  aad_file_unique = std::move(other276.aad_file_unique);
-  supply_aad_prefix = other276.supply_aad_prefix;
-  __isset = other276.__isset;
+AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other280) noexcept {
+  aad_prefix = std::move(other280.aad_prefix);
+  aad_file_unique = std::move(other280.aad_file_unique);
+  supply_aad_prefix = other280.supply_aad_prefix;
+  __isset = other280.__isset;
 }
-AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other277) {
-  aad_prefix = other277.aad_prefix;
-  aad_file_unique = other277.aad_file_unique;
-  supply_aad_prefix = other277.supply_aad_prefix;
-  __isset = other277.__isset;
+AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other281) {
+  aad_prefix = other281.aad_prefix;
+  aad_file_unique = other281.aad_file_unique;
+  supply_aad_prefix = other281.supply_aad_prefix;
+  __isset = other281.__isset;
   return *this;
 }
-AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other278) noexcept {
-  aad_prefix = std::move(other278.aad_prefix);
-  aad_file_unique = std::move(other278.aad_file_unique);
-  supply_aad_prefix = other278.supply_aad_prefix;
-  __isset = other278.__isset;
+AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other282) noexcept {
+  aad_prefix = std::move(other282.aad_prefix);
+  aad_file_unique = std::move(other282.aad_file_unique);
+  supply_aad_prefix = other282.supply_aad_prefix;
+  __isset = other282.__isset;
   return *this;
 }
 void AesGcmCtrV1::printTo(std::ostream& out) const {
@@ -7808,26 +7907,26 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) {
   swap(a.__isset, b.__isset);
 }
 
-EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other279) {
-  AES_GCM_V1 = other279.AES_GCM_V1;
-  AES_GCM_CTR_V1 = other279.AES_GCM_CTR_V1;
-  __isset = other279.__isset;
+EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other283) {
+  AES_GCM_V1 = other283.AES_GCM_V1;
+  AES_GCM_CTR_V1 = other283.AES_GCM_CTR_V1;
+  __isset = other283.__isset;
 }
-EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other280) noexcept {
-  AES_GCM_V1 = std::move(other280.AES_GCM_V1);
-  AES_GCM_CTR_V1 = std::move(other280.AES_GCM_CTR_V1);
-  __isset = other280.__isset;
+EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other284) noexcept {
+  AES_GCM_V1 = std::move(other284.AES_GCM_V1);
+  AES_GCM_CTR_V1 = std::move(other284.AES_GCM_CTR_V1);
+  __isset = other284.__isset;
 }
-EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other281) {
-  AES_GCM_V1 = other281.AES_GCM_V1;
-  AES_GCM_CTR_V1 = other281.AES_GCM_CTR_V1;
-  __isset = other281.__isset;
+EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other285) {
+  AES_GCM_V1 = other285.AES_GCM_V1;
+  AES_GCM_CTR_V1 = other285.AES_GCM_CTR_V1;
+  __isset = other285.__isset;
   return *this;
 }
-EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other282) noexcept {
-  AES_GCM_V1 = std::move(other282.AES_GCM_V1);
-  AES_GCM_CTR_V1 = std::move(other282.AES_GCM_CTR_V1);
-  __isset = other282.__isset;
+EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other286) noexcept {
+  AES_GCM_V1 = std::move(other286.AES_GCM_V1);
+  AES_GCM_CTR_V1 = std::move(other286.AES_GCM_CTR_V1);
+  __isset = other286.__isset;
   return *this;
 }
 void EncryptionAlgorithm::printTo(std::ostream& out) const {
@@ -7927,14 +8026,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->schema.clear();
-            uint32_t _size283;
-            ::apache::thrift::protocol::TType _etype286;
-            xfer += iprot->readListBegin(_etype286, _size283);
-            this->schema.resize(_size283);
-            uint32_t _i287;
-            for (_i287 = 0; _i287 < _size283; ++_i287)
+            uint32_t _size287;
+            ::apache::thrift::protocol::TType _etype290;
+            xfer += iprot->readListBegin(_etype290, _size287);
+            this->schema.resize(_size287);
+            uint32_t _i291;
+            for (_i291 = 0; _i291 < _size287; ++_i291)
             {
-              xfer += this->schema[_i287].read(iprot);
+              xfer += this->schema[_i291].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -7955,14 +8054,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->row_groups.clear();
-            uint32_t _size288;
-            ::apache::thrift::protocol::TType _etype291;
-            xfer += iprot->readListBegin(_etype291, _size288);
-            this->row_groups.resize(_size288);
-            uint32_t _i292;
-            for (_i292 = 0; _i292 < _size288; ++_i292)
+            uint32_t _size292;
+            ::apache::thrift::protocol::TType _etype295;
+            xfer += iprot->readListBegin(_etype295, _size292);
+            this->row_groups.resize(_size292);
+            uint32_t _i296;
+            for (_i296 = 0; _i296 < _size292; ++_i296)
             {
-              xfer += this->row_groups[_i292].read(iprot);
+              xfer += this->row_groups[_i296].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -7975,14 +8074,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->key_value_metadata.clear();
-            uint32_t _size293;
-            ::apache::thrift::protocol::TType _etype296;
-            xfer += iprot->readListBegin(_etype296, _size293);
-            this->key_value_metadata.resize(_size293);
-            uint32_t _i297;
-            for (_i297 = 0; _i297 < _size293; ++_i297)
+            uint32_t _size297;
+            ::apache::thrift::protocol::TType _etype300;
+            xfer += iprot->readListBegin(_etype300, _size297);
+            this->key_value_metadata.resize(_size297);
+            uint32_t _i301;
+            for (_i301 = 0; _i301 < _size297; ++_i301)
             {
-              xfer += this->key_value_metadata[_i297].read(iprot);
+              xfer += this->key_value_metadata[_i301].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -8003,14 +8102,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->column_orders.clear();
-            uint32_t _size298;
-            ::apache::thrift::protocol::TType _etype301;
-            xfer += iprot->readListBegin(_etype301, _size298);
-            this->column_orders.resize(_size298);
-            uint32_t _i302;
-            for (_i302 = 0; _i302 < _size298; ++_i302)
+            uint32_t _size302;
+            ::apache::thrift::protocol::TType _etype305;
+            xfer += iprot->readListBegin(_etype305, _size302);
+            this->column_orders.resize(_size302);
+            uint32_t _i306;
+            for (_i306 = 0; _i306 < _size302; ++_i306)
             {
-              xfer += this->column_orders[_i302].read(iprot);
+              xfer += this->column_orders[_i306].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -8067,10 +8166,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size()));
-    std::vector<SchemaElement> ::const_iterator _iter303;
-    for (_iter303 = this->schema.begin(); _iter303 != this->schema.end(); ++_iter303)
+    std::vector<SchemaElement> ::const_iterator _iter307;
+    for (_iter307 = this->schema.begin(); _iter307 != this->schema.end(); ++_iter307)
     {
-      xfer += (*_iter303).write(oprot);
+      xfer += (*_iter307).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -8083,10 +8182,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size()));
-    std::vector<RowGroup> ::const_iterator _iter304;
-    for (_iter304 = this->row_groups.begin(); _iter304 != this->row_groups.end(); ++_iter304)
+    std::vector<RowGroup> ::const_iterator _iter308;
+    for (_iter308 = this->row_groups.begin(); _iter308 != this->row_groups.end(); ++_iter308)
     {
-      xfer += (*_iter304).write(oprot);
+      xfer += (*_iter308).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -8096,10 +8195,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
-      std::vector<KeyValue> ::const_iterator _iter305;
-      for (_iter305 = this->key_value_metadata.begin(); _iter305 != this->key_value_metadata.end(); ++_iter305)
+      std::vector<KeyValue> ::const_iterator _iter309;
+      for (_iter309 = this->key_value_metadata.begin(); _iter309 != this->key_value_metadata.end(); ++_iter309)
       {
-        xfer += (*_iter305).write(oprot);
+        xfer += (*_iter309).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -8114,10 +8213,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->column_orders.size()));
-      std::vector<ColumnOrder> ::const_iterator _iter306;
-      for (_iter306 = this->column_orders.begin(); _iter306 != this->column_orders.end(); ++_iter306)
+      std::vector<ColumnOrder> ::const_iterator _iter310;
+      for (_iter310 = this->column_orders.begin(); _iter310 != this->column_orders.end(); ++_iter310)
       {
-        xfer += (*_iter306).write(oprot);
+        xfer += (*_iter310).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -8152,54 +8251,54 @@ void swap(FileMetaData &a, FileMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-FileMetaData::FileMetaData(const FileMetaData& other307) {
-  version = other307.version;
-  schema = other307.schema;
-  num_rows = other307.num_rows;
-  row_groups = other307.row_groups;
-  key_value_metadata = other307.key_value_metadata;
-  created_by = other307.created_by;
-  column_orders = other307.column_orders;
-  encryption_algorithm = other307.encryption_algorithm;
-  footer_signing_key_metadata = other307.footer_signing_key_metadata;
-  __isset = other307.__isset;
-}
-FileMetaData::FileMetaData(FileMetaData&& other308) noexcept {
-  version = other308.version;
-  schema = std::move(other308.schema);
-  num_rows = other308.num_rows;
-  row_groups = std::move(other308.row_groups);
-  key_value_metadata = std::move(other308.key_value_metadata);
-  created_by = std::move(other308.created_by);
-  column_orders = std::move(other308.column_orders);
-  encryption_algorithm = std::move(other308.encryption_algorithm);
-  footer_signing_key_metadata = std::move(other308.footer_signing_key_metadata);
-  __isset = other308.__isset;
-}
-FileMetaData& FileMetaData::operator=(const FileMetaData& other309) {
-  version = other309.version;
-  schema = other309.schema;
-  num_rows = other309.num_rows;
-  row_groups = other309.row_groups;
-  key_value_metadata = other309.key_value_metadata;
-  created_by = other309.created_by;
-  column_orders = other309.column_orders;
-  encryption_algorithm = other309.encryption_algorithm;
-  footer_signing_key_metadata = other309.footer_signing_key_metadata;
-  __isset = other309.__isset;
+FileMetaData::FileMetaData(const FileMetaData& other311) {
+  version = other311.version;
+  schema = other311.schema;
+  num_rows = other311.num_rows;
+  row_groups = other311.row_groups;
+  key_value_metadata = other311.key_value_metadata;
+  created_by = other311.created_by;
+  column_orders = other311.column_orders;
+  encryption_algorithm = other311.encryption_algorithm;
+  footer_signing_key_metadata = other311.footer_signing_key_metadata;
+  __isset = other311.__isset;
+}
+FileMetaData::FileMetaData(FileMetaData&& other312) noexcept {
+  version = other312.version;
+  schema = std::move(other312.schema);
+  num_rows = other312.num_rows;
+  row_groups = std::move(other312.row_groups);
+  key_value_metadata = std::move(other312.key_value_metadata);
+  created_by = std::move(other312.created_by);
+  column_orders = std::move(other312.column_orders);
+  encryption_algorithm = std::move(other312.encryption_algorithm);
+  footer_signing_key_metadata = std::move(other312.footer_signing_key_metadata);
+  __isset = other312.__isset;
+}
+FileMetaData& FileMetaData::operator=(const FileMetaData& other313) {
+  version = other313.version;
+  schema = other313.schema;
+  num_rows = other313.num_rows;
+  row_groups = other313.row_groups;
+  key_value_metadata = other313.key_value_metadata;
+  created_by = other313.created_by;
+  column_orders = other313.column_orders;
+  encryption_algorithm = other313.encryption_algorithm;
+  footer_signing_key_metadata = other313.footer_signing_key_metadata;
+  __isset = other313.__isset;
   return *this;
 }
-FileMetaData& FileMetaData::operator=(FileMetaData&& other310) noexcept {
-  version = other310.version;
-  schema = std::move(other310.schema);
-  num_rows = other310.num_rows;
-  row_groups = std::move(other310.row_groups);
-  key_value_metadata = std::move(other310.key_value_metadata);
-  created_by = std::move(other310.created_by);
-  column_orders = std::move(other310.column_orders);
-  encryption_algorithm = std::move(other310.encryption_algorithm);
-  footer_signing_key_metadata = std::move(other310.footer_signing_key_metadata);
-  __isset = other310.__isset;
+FileMetaData& FileMetaData::operator=(FileMetaData&& other314) noexcept {
+  version = other314.version;
+  schema = std::move(other314.schema);
+  num_rows = other314.num_rows;
+  row_groups = std::move(other314.row_groups);
+  key_value_metadata = std::move(other314.key_value_metadata);
+  created_by = std::move(other314.created_by);
+  column_orders = std::move(other314.column_orders);
+  encryption_algorithm = std::move(other314.encryption_algorithm);
+  footer_signing_key_metadata = std::move(other314.footer_signing_key_metadata);
+  __isset = other314.__isset;
   return *this;
 }
 void FileMetaData::printTo(std::ostream& out) const {
@@ -8315,26 +8414,26 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other311) {
-  encryption_algorithm = other311.encryption_algorithm;
-  key_metadata = other311.key_metadata;
-  __isset = other311.__isset;
+FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other315) {
+  encryption_algorithm = other315.encryption_algorithm;
+  key_metadata = other315.key_metadata;
+  __isset = other315.__isset;
 }
-FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other312) noexcept {
-  encryption_algorithm = std::move(other312.encryption_algorithm);
-  key_metadata = std::move(other312.key_metadata);
-  __isset = other312.__isset;
+FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other316) noexcept {
+  encryption_algorithm = std::move(other316.encryption_algorithm);
+  key_metadata = std::move(other316.key_metadata);
+  __isset = other316.__isset;
 }
-FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other313) {
-  encryption_algorithm = other313.encryption_algorithm;
-  key_metadata = other313.key_metadata;
-  __isset = other313.__isset;
+FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other317) {
+  encryption_algorithm = other317.encryption_algorithm;
+  key_metadata = other317.key_metadata;
+  __isset = other317.__isset;
   return *this;
 }
-FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other314) noexcept {
-  encryption_algorithm = std::move(other314.encryption_algorithm);
-  key_metadata = std::move(other314.key_metadata);
-  __isset = other314.__isset;
+FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other318) noexcept {
+  encryption_algorithm = std::move(other318.encryption_algorithm);
+  key_metadata = std::move(other318.key_metadata);
+  __isset = other318.__isset;
   return *this;
 }
 void FileCryptoMetaData::printTo(std::ostream& out) const {
diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h
index 9f468b5051db3..199b4ae747667 100644
--- a/cpp/src/generated/parquet_types.h
+++ b/cpp/src/generated/parquet_types.h
@@ -359,6 +359,8 @@ class EnumType;
 
 class DateType;
 
+class Float16Type;
+
 class NullType;
 
 class DecimalType;
@@ -770,6 +772,39 @@ void swap(DateType &a, DateType &b);
 std::ostream& operator<<(std::ostream& out, const DateType& obj);
 
 
+class Float16Type : public virtual ::apache::thrift::TBase {
+ public:
+
+  Float16Type(const Float16Type&) noexcept;
+  Float16Type(Float16Type&&) noexcept;
+  Float16Type& operator=(const Float16Type&) noexcept;
+  Float16Type& operator=(Float16Type&&) noexcept;
+  Float16Type() noexcept {
+  }
+
+  virtual ~Float16Type() noexcept;
+
+  bool operator == (const Float16Type & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const Float16Type &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const Float16Type & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override;
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(Float16Type &a, Float16Type &b);
+
+std::ostream& operator<<(std::ostream& out, const Float16Type& obj);
+
+
 /**
  * Logical type to annotate a column that is always null.
  * 
@@ -1253,7 +1288,7 @@ void swap(BsonType &a, BsonType &b);
 std::ostream& operator<<(std::ostream& out, const BsonType& obj);
 
 typedef struct _LogicalType__isset {
-  _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false) {}
+  _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false), FLOAT16(false) {}
   bool STRING :1;
   bool MAP :1;
   bool LIST :1;
@@ -1267,6 +1302,7 @@ typedef struct _LogicalType__isset {
   bool JSON :1;
   bool BSON :1;
   bool UUID :1;
+  bool FLOAT16 :1;
 } _LogicalType__isset;
 
 /**
@@ -1300,6 +1336,7 @@ class LogicalType : public virtual ::apache::thrift::TBase {
   JsonType JSON;
   BsonType BSON;
   UUIDType UUID;
+  Float16Type FLOAT16;
 
   _LogicalType__isset __isset;
 
@@ -1329,6 +1366,8 @@ class LogicalType : public virtual ::apache::thrift::TBase {
 
   void __set_UUID(const UUIDType& val);
 
+  void __set_FLOAT16(const Float16Type& val);
+
   bool operator == (const LogicalType & rhs) const
   {
     if (__isset.STRING != rhs.__isset.STRING)
@@ -1383,6 +1422,10 @@ class LogicalType : public virtual ::apache::thrift::TBase {
       return false;
     else if (__isset.UUID && !(UUID == rhs.UUID))
       return false;
+    if (__isset.FLOAT16 != rhs.__isset.FLOAT16)
+      return false;
+    else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16))
+      return false;
     return true;
   }
   bool operator != (const LogicalType &rhs) const {
diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift
index 88e44c96cc24c..d802166be66e8 100644
--- a/cpp/src/parquet/parquet.thrift
+++ b/cpp/src/parquet/parquet.thrift
@@ -234,6 +234,7 @@ struct MapType {}     // see LogicalTypes.md
 struct ListType {}    // see LogicalTypes.md
 struct EnumType {}    // allowed for BINARY, must be encoded with UTF-8
 struct DateType {}    // allowed for INT32
+struct Float16Type{}  // allowed for FIXED[2], must encode raw FLOAT16 bytes
 
 /**
  * Logical type to annotate a column that is always null.
@@ -344,6 +345,7 @@ union LogicalType {
   12: JsonType JSON           // use ConvertedType JSON
   13: BsonType BSON           // use ConvertedType BSON
   14: UUIDType UUID           // no compatible ConvertedType
+  15: Float16Type FLOAT16     // no compatible ConvertedType
 }
 
 /**

From 0909cd14f3bc0cbadd2ce4f66cbde3a0fbe205b2 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Wed, 14 Jun 2023 14:50:15 -0400
Subject: [PATCH 02/37] Implement LogicalType class

---
 cpp/src/parquet/types.cc | 26 ++++++++++++++++++++++++++
 cpp/src/parquet/types.h  | 13 +++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 3127b60e5d1ae..04a0fc2e0117b 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -441,6 +441,8 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
     return BSONLogicalType::Make();
   } else if (type.__isset.UUID) {
     return UUIDLogicalType::Make();
+  } else if (type.__isset.FLOAT16) {
+    return Float16LogicalType::Make();
   } else {
     throw ParquetException("Metadata contains Thrift LogicalType that is not recognized");
   }
@@ -494,6 +496,10 @@ std::shared_ptr<const LogicalType> LogicalType::BSON() { return BSONLogicalType:
 
 std::shared_ptr<const LogicalType> LogicalType::UUID() { return UUIDLogicalType::Make(); }
 
+std::shared_ptr<const LogicalType> LogicalType::Float16() {
+  return Float16LogicalType::Make();
+}
+
 std::shared_ptr<const LogicalType> LogicalType::None() { return NoLogicalType::Make(); }
 
 /*
@@ -575,6 +581,7 @@ class LogicalType::Impl {
   class JSON;
   class BSON;
   class UUID;
+  class Float16;
   class No;
   class Undefined;
 
@@ -644,6 +651,9 @@ bool LogicalType::is_null() const { return impl_->type() == LogicalType::Type::N
 bool LogicalType::is_JSON() const { return impl_->type() == LogicalType::Type::JSON; }
 bool LogicalType::is_BSON() const { return impl_->type() == LogicalType::Type::BSON; }
 bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::UUID; }
+bool LogicalType::is_float16() const {
+  return impl_->type() == LogicalType::Type::FLOAT16;
+}
 bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; }
 bool LogicalType::is_valid() const {
   return impl_->type() != LogicalType::Type::UNDEFINED;
@@ -1557,6 +1567,22 @@ class LogicalType::Impl::UUID final : public LogicalType::Impl::Incompatible,
 
 GENERATE_MAKE(UUID)
 
+class LogicalType::Impl::Float16 final : public LogicalType::Impl::Incompatible,
+                                         public LogicalType::Impl::TypeLengthApplicable {
+ public:
+  friend class Float16LogicalType;
+
+  OVERRIDE_TOSTRING(Float16)
+  OVERRIDE_TOTHRIFT(Float16Type, FLOAT16)
+
+ private:
+  Float16()
+      : LogicalType::Impl(LogicalType::Type::FLOAT16, SortOrder::SIGNED),
+        LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 2) {}
+};
+
+GENERATE_MAKE(Float16)
+
 class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible,
                                     public LogicalType::Impl::UniversalApplicable {
  public:
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 0315376a883e9..76dd0efc7cb4a 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -157,6 +157,7 @@ class PARQUET_EXPORT LogicalType {
       JSON,
       BSON,
       UUID,
+      FLOAT16,
       NONE  // Not a real logical type; should always be last element
     };
   };
@@ -210,6 +211,7 @@ class PARQUET_EXPORT LogicalType {
   static std::shared_ptr<const LogicalType> JSON();
   static std::shared_ptr<const LogicalType> BSON();
   static std::shared_ptr<const LogicalType> UUID();
+  static std::shared_ptr<const LogicalType> Float16();
 
   /// \brief Create a placeholder for when no logical type is specified
   static std::shared_ptr<const LogicalType> None();
@@ -263,6 +265,7 @@ class PARQUET_EXPORT LogicalType {
   bool is_JSON() const;
   bool is_BSON() const;
   bool is_UUID() const;
+  bool is_float16() const;
   bool is_none() const;
   /// \brief Return true if this logical type is of a known type.
   bool is_valid() const;
@@ -433,6 +436,16 @@ class PARQUET_EXPORT UUIDLogicalType : public LogicalType {
   UUIDLogicalType() = default;
 };
 
+/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 2,
+/// must encode raw FLOAT16 bytes.
+class PARQUET_EXPORT Float16LogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  Float16LogicalType() = default;
+};
+
 /// \brief Allowed for any physical type.
 class PARQUET_EXPORT NoLogicalType : public LogicalType {
  public:

From 31135737ade20e40bc79594b4117d9fb77fdf796 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Wed, 14 Jun 2023 14:52:55 -0400
Subject: [PATCH 03/37] Implement column statistics

---
 cpp/src/parquet/float_internal.h   |  61 +++++
 cpp/src/parquet/statistics.cc      | 144 ++++++++++--
 cpp/src/parquet/statistics_test.cc | 343 +++++++++++++++++++++--------
 3 files changed, 442 insertions(+), 106 deletions(-)
 create mode 100644 cpp/src/parquet/float_internal.h

diff --git a/cpp/src/parquet/float_internal.h b/cpp/src/parquet/float_internal.h
new file mode 100644
index 0000000000000..c82c9d575ce3b
--- /dev/null
+++ b/cpp/src/parquet/float_internal.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/ubsan.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+struct float16 {
+  constexpr static uint16_t min() { return 0b1111101111111111; }
+  constexpr static uint16_t max() { return 0b0111101111111111; }
+  constexpr static uint16_t positive_zero() { return 0b0000000000000000; }
+  constexpr static uint16_t negative_zero() { return 0b1000000000000000; }
+
+  static uint8_t* min_ptr() { return min_; }
+  static uint8_t* max_ptr() { return max_; }
+  static uint8_t* positive_zero_ptr() { return positive_zero_; }
+  static uint8_t* negative_zero_ptr() { return negative_zero_; }
+
+  static bool is_nan(uint16_t n) { return (n & 0x7c00) == 0x7c00 && (n & 0x03ff) != 0; }
+  static bool is_zero(uint16_t n) { return (n & 0x7fff) == 0; }
+  static bool signbit(uint16_t n) { return (n & 0x8000) != 0; }
+
+  static uint16_t Pack(const uint8_t* src) {
+    return ::arrow::bit_util::FromLittleEndian(::arrow::util::SafeLoadAs<uint16_t>(src));
+  }
+  static uint16_t Pack(const FLBA& src) { return Pack(src.ptr); }
+
+  static uint8_t* Unpack(uint16_t src, uint8_t* dest) {
+    src = ::arrow::bit_util::ToLittleEndian(src);
+    return static_cast<uint8_t*>(std::memcpy(dest, &src, sizeof(src)));
+  }
+
+ private:
+  static inline uint8_t min_[] = {0b11111111, 0b11111011};
+  static inline uint8_t max_[] = {0b11111111, 0b01111011};
+  static inline uint8_t positive_zero_[] = {0b00000000, 0b00000000};
+  static inline uint8_t negative_zero_[] = {0b00000000, 0b10000000};
+};
+
+}  // namespace parquet
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index ccfb69c487d40..a3a486539710e 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -35,6 +35,7 @@
 #include "arrow/visit_data_inline.h"
 #include "parquet/encoding.h"
 #include "parquet/exception.h"
+#include "parquet/float_internal.h"
 #include "parquet/platform.h"
 #include "parquet/schema.h"
 
@@ -277,11 +278,54 @@ template <bool is_signed>
 struct CompareHelper<FLBAType, is_signed>
     : public BinaryLikeCompareHelperBase<FLBAType, is_signed> {};
 
+struct Float16CompareHelper {
+  using T = FLBA;
+
+  static T DefaultMin() { return T{float16::max_ptr()}; }
+  static T DefaultMax() { return T{float16::min_ptr()}; }
+
+  static T Coalesce(T val, T fallback) {
+    return val.ptr != nullptr && float16::is_nan(float16::Pack(val)) ? fallback : val;
+  }
+
+  static inline bool Compare(int type_length, const T& a, const T& b) {
+    uint16_t l = float16::Pack(a);
+    uint16_t r = float16::Pack(b);
+
+    if (l & 0x8000) {
+      if (r & 0x8000) {
+        // Both are negative
+        return (l & 0x7fff) > (r & 0x7fff);
+      } else {
+        // Handle +/-0
+        return (l & 0x7fff) || r != 0;
+      }
+    } else if (r & 0x8000) {
+      return false;
+    } else {
+      // Both are positive
+      return (l & 0x7fff) < (r & 0x7fff);
+    }
+  }
+
+  static T Min(int type_length, const T& a, const T& b) {
+    if (a.ptr == nullptr) return b;
+    if (b.ptr == nullptr) return a;
+    return Compare(type_length, a, b) ? a : b;
+  }
+
+  static T Max(int type_length, const T& a, const T& b) {
+    if (a.ptr == nullptr) return b;
+    if (b.ptr == nullptr) return a;
+    return Compare(type_length, a, b) ? b : a;
+  }
+};
+
 using ::std::optional;
 
 template <typename T>
 ::arrow::enable_if_t<std::is_integral<T>::value, optional<std::pair<T, T>>>
-CleanStatistic(std::pair<T, T> min_max) {
+CleanStatistic(std::pair<T, T> min_max, LogicalType::Type::type) {
   return min_max;
 }
 
@@ -292,7 +336,7 @@ CleanStatistic(std::pair<T, T> min_max) {
 // - If max is -0.0f, replace with 0.0f
 template <typename T>
 ::arrow::enable_if_t<std::is_floating_point<T>::value, optional<std::pair<T, T>>>
-CleanStatistic(std::pair<T, T> min_max) {
+CleanStatistic(std::pair<T, T> min_max, LogicalType::Type::type) {
   T min = min_max.first;
   T max = min_max.second;
 
@@ -318,26 +362,55 @@ CleanStatistic(std::pair<T, T> min_max) {
   return {{min, max}};
 }
 
-optional<std::pair<FLBA, FLBA>> CleanStatistic(std::pair<FLBA, FLBA> min_max) {
+optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_max) {
+  FLBA min = min_max.first;
+  FLBA max = min_max.second;
+  uint16_t min_packed = float16::Pack(min);
+  uint16_t max_packed = float16::Pack(max);
+
+  if (float16::is_nan(min_packed) || float16::is_nan(max_packed)) {
+    return ::std::nullopt;
+  }
+
+  if (min_packed == float16::max() && max_packed == float16::min()) {
+    return ::std::nullopt;
+  }
+
+  if (min_packed == float16::positive_zero()) {
+    min = FLBA{float16::negative_zero_ptr()};
+  }
+  if (max_packed == float16::negative_zero()) {
+    max = FLBA{float16::positive_zero_ptr()};
+  }
+
+  return {{min, max}};
+}
+
+optional<std::pair<FLBA, FLBA>> CleanStatistic(std::pair<FLBA, FLBA> min_max,
+                                               LogicalType::Type::type logical_type) {
   if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) {
     return ::std::nullopt;
   }
+  if (logical_type == LogicalType::Type::FLOAT16) {
+    return CleanFloat16Statistic(std::move(min_max));
+  }
   return min_max;
 }
 
 optional<std::pair<ByteArray, ByteArray>> CleanStatistic(
-    std::pair<ByteArray, ByteArray> min_max) {
+    std::pair<ByteArray, ByteArray> min_max, LogicalType::Type::type) {
   if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) {
     return ::std::nullopt;
   }
   return min_max;
 }
 
-template <bool is_signed, typename DType>
+template <bool is_signed, typename DType,
+          typename HelperType = CompareHelper<DType, is_signed>>
 class TypedComparatorImpl : virtual public TypedComparator<DType> {
  public:
   using T = typename DType::c_type;
-  using Helper = CompareHelper<DType, is_signed>;
+  using Helper = HelperType;
 
   explicit TypedComparatorImpl(int type_length = -1) : type_length_(type_length) {}
 
@@ -412,9 +485,9 @@ TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* va
   return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)};
 }
 
-template <bool is_signed, typename DType>
+template <bool is_signed, typename DType, typename Helper>
 std::pair<typename DType::c_type, typename DType::c_type>
-TypedComparatorImpl<is_signed, DType>::GetMinMax(const ::arrow::Array& values) {
+TypedComparatorImpl<is_signed, DType, Helper>::GetMinMax(const ::arrow::Array& values) {
   ParquetException::NYI(values.type()->ToString());
 }
 
@@ -458,6 +531,16 @@ std::pair<ByteArray, ByteArray> TypedComparatorImpl<false, ByteArrayType>::GetMi
   return GetMinMaxBinaryHelper<false>(*this, values);
 }
 
+static LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) {
+  if (const auto& logical_type = descr->logical_type()) {
+    return logical_type->type();
+  }
+  return LogicalType::Type::NONE;
+}
+static LogicalType::Type::type LogicalTypeId(const Statistics& stats) {
+  return LogicalTypeId(stats.descr());
+}
+
 template <typename DType>
 class TypedStatisticsImpl : public TypedStatistics<DType> {
  public:
@@ -469,8 +552,7 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
         pool_(pool),
         min_buffer_(AllocateBuffer(pool_, 0)),
         max_buffer_(AllocateBuffer(pool_, 0)) {
-    auto comp = Comparator::Make(descr);
-    comparator_ = std::static_pointer_cast<TypedComparator<DType>>(comp);
+    comparator_ = MakeComparator<DType>(descr);
     TypedStatisticsImpl::Reset();
   }
 
@@ -530,6 +612,19 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
   bool Equals(const Statistics& raw_other) const override {
     if (physical_type() != raw_other.physical_type()) return false;
 
+    const auto logical_id = LogicalTypeId(*this);
+    switch (logical_id) {
+      // Only compare against logical types that influence the interpretation of the
+      // physical type
+      case LogicalType::Type::FLOAT16:
+        if (LogicalTypeId(raw_other) != logical_id) {
+          return false;
+        }
+        break;
+      default:
+        break;
+    }
+
     const auto& other = checked_cast<const TypedStatisticsImpl&>(raw_other);
 
     if (has_min_max_ != other.has_min_max_) return false;
@@ -686,7 +781,7 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
 
   void SetMinMaxPair(std::pair<T, T> min_max) {
     // CleanStatistic can return a nullopt in case of erroneous values, e.g. NaN
-    auto maybe_min_max = CleanStatistic(min_max);
+    auto maybe_min_max = CleanStatistic(min_max, LogicalTypeId(*this));
     if (!maybe_min_max) return;
 
     auto min = maybe_min_max.value().first;
@@ -795,12 +890,8 @@ void TypedStatisticsImpl<ByteArrayType>::PlainDecode(const std::string& src,
   dst->ptr = reinterpret_cast<const uint8_t*>(src.c_str());
 }
 
-}  // namespace
-
-// ----------------------------------------------------------------------
-// Public factory functions
-
-std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
+std::shared_ptr<Comparator> DoMakeComparator(Type::type physical_type,
+                                             LogicalType::Type::type logical_type,
                                              SortOrder::type sort_order,
                                              int type_length) {
   if (SortOrder::SIGNED == sort_order) {
@@ -820,6 +911,10 @@ std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
       case Type::BYTE_ARRAY:
         return std::make_shared<TypedComparatorImpl<true, ByteArrayType>>();
       case Type::FIXED_LEN_BYTE_ARRAY:
+        if (logical_type == LogicalType::Type::FLOAT16) {
+          return std::make_shared<
+              TypedComparatorImpl<true, FLBAType, Float16CompareHelper>>();
+        }
         return std::make_shared<TypedComparatorImpl<true, FLBAType>>(type_length);
       default:
         ParquetException::NYI("Signed Compare not implemented");
@@ -845,8 +940,21 @@ std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
   return nullptr;
 }
 
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Public factory functions
+
+std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
+                                             SortOrder::type sort_order,
+                                             int type_length) {
+  return DoMakeComparator(physical_type, LogicalType::Type::NONE, sort_order,
+                          type_length);
+}
+
 std::shared_ptr<Comparator> Comparator::Make(const ColumnDescriptor* descr) {
-  return Make(descr->physical_type(), descr->sort_order(), descr->type_length());
+  return DoMakeComparator(descr->physical_type(), LogicalTypeId(descr),
+                          descr->sort_order(), descr->type_length());
 }
 
 std::shared_ptr<Statistics> Statistics::Make(const ColumnDescriptor* descr,
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 637832945ec57..4dc77e771bded 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -40,6 +40,7 @@
 #include "parquet/column_writer.h"
 #include "parquet/file_reader.h"
 #include "parquet/file_writer.h"
+#include "parquet/float_internal.h"
 #include "parquet/platform.h"
 #include "parquet/schema.h"
 #include "parquet/statistics.h"
@@ -875,9 +876,22 @@ TEST(CorrectStatistics, Basics) {
 // Test SortOrder class
 static const int NUM_VALUES = 10;
 
-template <typename TestType>
+template <typename T>
+struct RebindLogical {
+  using ParquetType = T;
+  using CType = typename T::c_type;
+};
+
+template <>
+struct RebindLogical<Float16LogicalType> {
+  using ParquetType = FLBAType;
+  using CType = ParquetType::c_type;
+};
+
+template <typename T>
 class TestStatisticsSortOrder : public ::testing::Test {
  public:
+  using TestType = typename RebindLogical<T>::ParquetType;
   using c_type = typename TestType::c_type;
 
   void SetUp() override {
@@ -955,7 +969,7 @@ class TestStatisticsSortOrder : public ::testing::Test {
 };
 
 using CompareTestTypes = ::testing::Types<Int32Type, Int64Type, FloatType, DoubleType,
-                                          ByteArrayType, FLBAType>;
+                                          ByteArrayType, FLBAType, Float16LogicalType>;
 
 // TYPE::INT32
 template <>
@@ -1102,6 +1116,36 @@ void TestStatisticsSortOrder<FLBAType>::SetValues() {
       .set_max(std::string(reinterpret_cast<const char*>(&vals[8][0]), FLBA_LENGTH));
 }
 
+template <>
+void TestStatisticsSortOrder<Float16LogicalType>::AddNodes(std::string name) {
+  auto node =
+      schema::PrimitiveNode::Make(name, Repetition::REQUIRED, LogicalType::Float16(),
+                                  Type::FIXED_LEN_BYTE_ARRAY, sizeof(uint16_t));
+  fields_.push_back(std::move(node));
+}
+
+template <>
+void TestStatisticsSortOrder<Float16LogicalType>::SetValues() {
+  constexpr int kValueLen = 2;
+  constexpr int kNumBytes = NUM_VALUES * kValueLen;
+
+  const uint16_t packed_vals[NUM_VALUES] = {
+      0b0000000000000000, 0b0000000000000000, 0b1000000000000000, 0b1000010000000000,
+      0b0111110000001000, 0b1000000000000000, 0b0000010000000000, 0b0000000001000000,
+      0b1111110000001000, 0b1000000001000000};
+
+  values_buf_.resize(kNumBytes);
+  uint8_t* ptr = values_buf_.data();
+  for (int i = 0; i < NUM_VALUES; ++i) {
+    values_[i].ptr = float16::Unpack(packed_vals[i], ptr);
+    ptr += kValueLen;
+  }
+
+  stats_[0]
+      .set_min(std::string(reinterpret_cast<const char*>(values_[3].ptr), kValueLen))
+      .set_max(std::string(reinterpret_cast<const char*>(values_[6].ptr), kValueLen));
+}
+
 TYPED_TEST_SUITE(TestStatisticsSortOrder, CompareTestTypes);
 
 TYPED_TEST(TestStatisticsSortOrder, MinMax) {
@@ -1167,12 +1211,20 @@ TEST_F(TestStatisticsSortOrderFLBA, UnknownSortOrder) {
   ASSERT_FALSE(cc_metadata->is_stats_set());
 }
 
+template <typename T>
+static std::string EncodeValue(const T& val) {
+  return std::string(reinterpret_cast<const char*>(&val), sizeof(val));
+}
+static std::string EncodeValue(const FLBA& val, int length = sizeof(uint16_t)) {
+  return std::string(reinterpret_cast<const char*>(val.ptr), length);
+}
+
 template <typename Stats, typename Array, typename T = typename Array::value_type>
 void AssertMinMaxAre(Stats stats, const Array& values, T expected_min, T expected_max) {
   stats->Update(values.data(), values.size(), 0);
   ASSERT_TRUE(stats->HasMinMax());
-  EXPECT_EQ(stats->min(), expected_min);
-  EXPECT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->EncodeMin(), EncodeValue(expected_min));
+  EXPECT_EQ(stats->EncodeMax(), EncodeValue(expected_max));
 }
 
 template <typename Stats, typename Array, typename T = typename Stats::T>
@@ -1184,8 +1236,8 @@ void AssertMinMaxAre(Stats stats, const Array& values, const uint8_t* valid_bitm
   stats->UpdateSpaced(values.data(), valid_bitmap, 0, non_null_count + null_count,
                       non_null_count, null_count);
   ASSERT_TRUE(stats->HasMinMax());
-  EXPECT_EQ(stats->min(), expected_min);
-  EXPECT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->EncodeMin(), EncodeValue(expected_min));
+  EXPECT_EQ(stats->EncodeMax(), EncodeValue(expected_max));
 }
 
 template <typename Stats, typename Array>
@@ -1268,50 +1320,217 @@ void CheckExtrema() {
 TEST(TestStatistic, Int32Extrema) { CheckExtrema<Int32Type>(); }
 TEST(TestStatistic, Int64Extrema) { CheckExtrema<Int64Type>(); }
 
-// PARQUET-1225: Float NaN values may lead to incorrect min-max
-template <typename ParquetType>
-void CheckNaNs() {
-  using T = typename ParquetType::c_type;
+template <typename T>
+class TestFloatStatistics : public ::testing::Test {
+ public:
+  using ParquetType = typename RebindLogical<T>::ParquetType;
+  using c_type = typename ParquetType::c_type;
+
+  void Init();
+  void SetUp() override { this->Init(); }
+
+  bool signbit(c_type val);
+  void CheckEq(const c_type& l, const c_type& r);
+  NodePtr MakeNode(const std::string& name, Repetition::type rep);
+
+  template <typename Stats, typename Values>
+  void CheckMinMaxZeroesSign(Stats stats, const Values& values) {
+    stats->Update(values.data(), values.size(), 0);
+    ASSERT_TRUE(stats->HasMinMax());
+
+    this->CheckEq(stats->min(), positive_zero_);
+    ASSERT_TRUE(this->signbit(stats->min()));
+
+    this->CheckEq(stats->max(), positive_zero_);
+    ASSERT_FALSE(this->signbit(stats->max()));
+  }
+
+  // ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in
+  // parquet-mr
+  void TestNegativeZeroes() {
+    NodePtr node = this->MakeNode("f", Repetition::OPTIONAL);
+    ColumnDescriptor descr(node, 1, 1);
 
+    {
+      std::array<c_type, 2> values{negative_zero_, positive_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+
+    {
+      std::array<c_type, 2> values{positive_zero_, negative_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+
+    {
+      std::array<c_type, 2> values{negative_zero_, negative_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+
+    {
+      std::array<c_type, 2> values{positive_zero_, positive_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+  }
+
+  // PARQUET-1225: Float NaN values may lead to incorrect min-max
+  template <typename Values>
+  void CheckNaNs(ColumnDescriptor* descr, const Values& all_nans, const Values& some_nans,
+                 const Values& other_nans, c_type min, c_type max, uint8_t valid_bitmap,
+                 uint8_t valid_bitmap_no_nans) {
+    auto some_nan_stats = MakeStatistics<ParquetType>(descr);
+    // Ingesting only nans should not yield valid min max
+    AssertUnsetMinMax(some_nan_stats, all_nans);
+    // Ingesting a mix of NaNs and non-NaNs should not yield valid min max.
+    AssertMinMaxAre(some_nan_stats, some_nans, min, max);
+    // Ingesting only nans after a valid min/max, should have not effect
+    AssertMinMaxAre(some_nan_stats, all_nans, min, max);
+
+    some_nan_stats = MakeStatistics<ParquetType>(descr);
+    AssertUnsetMinMax(some_nan_stats, all_nans, &valid_bitmap);
+    // NaNs should not pollute min max when excluded via null bitmap.
+    AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap_no_nans, min, max);
+    // Ingesting NaNs with a null bitmap should not change the result.
+    AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap, min, max);
+
+    // An array that doesn't start with NaN
+    auto other_stats = MakeStatistics<ParquetType>(descr);
+    AssertMinMaxAre(other_stats, other_nans, min, max);
+  }
+
+  void TestNaNs();
+
+ protected:
+  std::vector<uint8_t> data_buf_;
+  c_type positive_zero_;
+  c_type negative_zero_;
+};
+
+template <typename T>
+void TestFloatStatistics<T>::Init() {
+  positive_zero_ = c_type{};
+  negative_zero_ = -positive_zero_;
+}
+template <>
+void TestFloatStatistics<Float16LogicalType>::Init() {
+  positive_zero_ = c_type{float16::positive_zero_ptr()};
+  negative_zero_ = c_type{float16::negative_zero_ptr()};
+}
+
+template <typename T>
+NodePtr TestFloatStatistics<T>::MakeNode(const std::string& name, Repetition::type rep) {
+  return PrimitiveNode::Make(name, rep, ParquetType::type_num);
+}
+template <>
+NodePtr TestFloatStatistics<Float16LogicalType>::MakeNode(const std::string& name,
+                                                          Repetition::type rep) {
+  return PrimitiveNode::Make(name, rep, LogicalType::Float16(),
+                             Type::FIXED_LEN_BYTE_ARRAY, 2);
+}
+
+template <typename T>
+void TestFloatStatistics<T>::CheckEq(const c_type& l, const c_type& r) {
+  ASSERT_EQ(l, r);
+}
+template <>
+void TestFloatStatistics<Float16LogicalType>::CheckEq(const c_type& a, const c_type& b) {
+  auto l = float16::Pack(a);
+  auto r = float16::Pack(b);
+  if (float16::is_zero(l) && float16::is_zero(r)) return;
+  ASSERT_EQ(l, r);
+}
+
+template <typename T>
+bool TestFloatStatistics<T>::signbit(c_type val) {
+  return std::signbit(val);
+}
+template <>
+bool TestFloatStatistics<Float16LogicalType>::signbit(c_type val) {
+  return float16::signbit(float16::Pack(val));
+}
+
+template <typename T>
+void TestFloatStatistics<T>::TestNaNs() {
   constexpr int kNumValues = 8;
-  NodePtr node = PrimitiveNode::Make("f", Repetition::OPTIONAL, ParquetType::type_num);
+  NodePtr node = this->MakeNode("f", Repetition::OPTIONAL);
   ColumnDescriptor descr(node, 1, 1);
 
-  constexpr T nan = std::numeric_limits<T>::quiet_NaN();
-  constexpr T min = -4.0f;
-  constexpr T max = 3.0f;
+  constexpr c_type nan = std::numeric_limits<c_type>::quiet_NaN();
+  constexpr c_type min = -4.0f;
+  constexpr c_type max = 3.0f;
+
+  std::array<c_type, kNumValues> all_nans{nan, nan, nan, nan, nan, nan, nan, nan};
+  std::array<c_type, kNumValues> some_nans{nan, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
+  std::array<c_type, kNumValues> other_nans{1.5f, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
 
-  std::array<T, kNumValues> all_nans{nan, nan, nan, nan, nan, nan, nan, nan};
-  std::array<T, kNumValues> some_nans{nan, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
   uint8_t valid_bitmap = 0x7F;  // 0b01111111
   // NaNs excluded
   uint8_t valid_bitmap_no_nans = 0x6E;  // 0b01101110
 
-  // Test values
-  auto some_nan_stats = MakeStatistics<ParquetType>(&descr);
-  // Ingesting only nans should not yield valid min max
-  AssertUnsetMinMax(some_nan_stats, all_nans);
-  // Ingesting a mix of NaNs and non-NaNs should not yield valid min max.
-  AssertMinMaxAre(some_nan_stats, some_nans, min, max);
-  // Ingesting only nans after a valid min/max, should have not effect
-  AssertMinMaxAre(some_nan_stats, all_nans, min, max);
+  this->CheckNaNs(&descr, all_nans, some_nans, other_nans, min, max, valid_bitmap,
+                  valid_bitmap_no_nans);
+}
 
-  some_nan_stats = MakeStatistics<ParquetType>(&descr);
-  AssertUnsetMinMax(some_nan_stats, all_nans, &valid_bitmap);
-  // NaNs should not pollute min max when excluded via null bitmap.
-  AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap_no_nans, min, max);
-  // Ingesting NaNs with a null bitmap should not change the result.
-  AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap, min, max);
+template <>
+void TestFloatStatistics<Float16LogicalType>::TestNaNs() {
+  constexpr int kNumValues = 8;
+  constexpr int kValueLen = sizeof(uint16_t);
+
+  NodePtr node = this->MakeNode("f", Repetition::OPTIONAL);
+  ColumnDescriptor descr(node, 1, 1);
+
+  const uint16_t nan_int = 0b1111110010101010;
+  const uint16_t min_int = 0b1010010111000110;
+  const uint16_t max_int = 0b0011100011010011;
+  uint8_t min_max_data[2 * kValueLen];
+  const auto min = FLBA{float16::Unpack(min_int, &min_max_data[0 * kValueLen])};
+  const auto max = FLBA{float16::Unpack(max_int, &min_max_data[1 * kValueLen])};
+
+  std::array<uint16_t, kNumValues> all_nans_packed = {nan_int, nan_int, nan_int, nan_int,
+                                                      nan_int, nan_int, nan_int, nan_int};
+  std::array<uint16_t, kNumValues> some_nans_packed = {nan_int,
+                                                       max_int,
+                                                       0b1000111000110000,
+                                                       0b1000010001000001,
+                                                       nan_int,
+                                                       0b0000100000011110,
+                                                       min_int,
+                                                       nan_int};
+  std::array<uint16_t, kNumValues> other_nans_packed = some_nans_packed;
+  other_nans_packed[0] = 0b0000010000110011;
+
+  std::array<uint8_t, (kNumValues * kValueLen * 3)> bytes;
+  uint8_t* at = bytes.data();
+  auto prepare_values = [&](const auto& packed_values) -> std::vector<FLBA> {
+    std::vector<FLBA> out;
+    for (uint16_t packed : packed_values) {
+      out.push_back(FLBA{float16::Unpack(packed, at)});
+      at += kValueLen;
+    }
+    return out;
+  };
 
-  // An array that doesn't start with NaN
-  std::array<T, kNumValues> other_nans{1.5f, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
-  auto other_stats = MakeStatistics<ParquetType>(&descr);
-  AssertMinMaxAre(other_stats, other_nans, min, max);
+  auto all_nans = prepare_values(all_nans_packed);
+  auto some_nans = prepare_values(some_nans_packed);
+  auto other_nans = prepare_values(other_nans_packed);
+
+  uint8_t valid_bitmap = 0x7F;  // 0b01111111
+  // NaNs excluded
+  uint8_t valid_bitmap_no_nans = 0x6E;  // 0b01101110
+
+  this->CheckNaNs(&descr, all_nans, some_nans, other_nans, min, max, valid_bitmap,
+                  valid_bitmap_no_nans);
 }
 
-TEST(TestStatistic, NaNFloatValues) { CheckNaNs<FloatType>(); }
+using FloatingPointTypes = ::testing::Types<FloatType, DoubleType, Float16LogicalType>;
+
+TYPED_TEST_SUITE(TestFloatStatistics, FloatingPointTypes);
 
-TEST(TestStatistic, NaNDoubleValues) { CheckNaNs<DoubleType>(); }
+TYPED_TEST(TestFloatStatistics, NegativeZeros) { this->TestNegativeZeroes(); }
+TYPED_TEST(TestFloatStatistics, NaNs) { this->TestNaNs(); }
 
 // ARROW-7376
 TEST(TestStatisticsSortOrderFloatNaN, NaNAndNullsInfiniteLoop) {
@@ -1327,58 +1546,6 @@ TEST(TestStatisticsSortOrderFloatNaN, NaNAndNullsInfiniteLoop) {
   AssertUnsetMinMax(stats, nans_but_last, &all_but_last_valid);
 }
 
-template <typename Stats, typename Array, typename T = typename Array::value_type>
-void AssertMinMaxZeroesSign(Stats stats, const Array& values) {
-  stats->Update(values.data(), values.size(), 0);
-  ASSERT_TRUE(stats->HasMinMax());
-
-  T zero{};
-  ASSERT_EQ(stats->min(), zero);
-  ASSERT_TRUE(std::signbit(stats->min()));
-
-  ASSERT_EQ(stats->max(), zero);
-  ASSERT_FALSE(std::signbit(stats->max()));
-}
-
-// ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in
-// parquet-mr
-template <typename ParquetType>
-void CheckNegativeZeroStats() {
-  using T = typename ParquetType::c_type;
-
-  NodePtr node = PrimitiveNode::Make("f", Repetition::OPTIONAL, ParquetType::type_num);
-  ColumnDescriptor descr(node, 1, 1);
-  T zero{};
-
-  {
-    std::array<T, 2> values{-zero, zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-
-  {
-    std::array<T, 2> values{zero, -zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-
-  {
-    std::array<T, 2> values{-zero, -zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-
-  {
-    std::array<T, 2> values{zero, zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-}
-
-TEST(TestStatistics, FloatNegativeZero) { CheckNegativeZeroStats<FloatType>(); }
-
-TEST(TestStatistics, DoubleNegativeZero) { CheckNegativeZeroStats<DoubleType>(); }
-
 // Test statistics for binary column with UNSIGNED sort order
 TEST(TestStatisticsSortOrderMinMax, Unsigned) {
   std::string dir_string(test::get_data_dir());

From 046e967d98c9a2ca938a612f96816cad58c02b52 Mon Sep 17 00:00:00 2001
From: Ben Harkins <60872452+benibus@users.noreply.github.com>
Date: Thu, 15 Jun 2023 11:26:14 -0400
Subject: [PATCH 04/37] Apply suggestion from code review

Co-authored-by: Antoine Pitrou <pitrou@free.fr>
---
 cpp/src/parquet/statistics_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 4dc77e771bded..d22a0bc681ade 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -1335,7 +1335,7 @@ class TestFloatStatistics : public ::testing::Test {
 
   template <typename Stats, typename Values>
   void CheckMinMaxZeroesSign(Stats stats, const Values& values) {
-    stats->Update(values.data(), values.size(), 0);
+    stats->Update(values.data(), values.size(), /*null_count=*/0);
     ASSERT_TRUE(stats->HasMinMax());
 
     this->CheckEq(stats->min(), positive_zero_);

From 66efa36f54f0c1982b409c0c1d2a1b1317d2818f Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Fri, 16 Jun 2023 17:19:55 -0400
Subject: [PATCH 05/37] Add Float16 utils to Arrow

---
 cpp/src/arrow/util/float16.h | 158 +++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 cpp/src/arrow/util/float16.h

diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
new file mode 100644
index 0000000000000..fedba0c29dc75
--- /dev/null
+++ b/cpp/src/arrow/util/float16.h
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <type_traits>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+/// \brief Base class for an IEEE half-precision float, encoded as a `uint16_t`
+///
+/// The exact format is as follows (from MSB to LSB):
+/// - bit 0:     sign
+/// - bits 1-5:  exponent
+/// - bits 6-15: mantissa
+///
+/// NOTE: Methods in the class should not mutate the unerlying value or produce copies.
+/// Such functionality is delegated to subclasses.
+class ARROW_EXPORT Float16Base {
+ public:
+  Float16Base() = default;
+  constexpr explicit Float16Base(uint16_t value) : value_(value) {}
+
+  constexpr uint16_t bits() const { return value_; }
+  constexpr explicit operator uint16_t() const { return bits(); }
+
+  constexpr bool signbit() const { return (value_ & 0x8000) != 0; }
+
+  constexpr bool is_nan() const {
+    return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0;
+  }
+  constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; }
+  constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }
+
+  void ToBytes(uint8_t* dest) const {
+    auto value = bit_util::ToLittleEndian(value_);
+    std::memcpy(dest, &value, sizeof(value));
+  }
+  std::array<uint8_t, 2> ToBytes() const {
+    std::array<uint8_t, 2> bytes;
+    ToBytes(bytes.data());
+    return bytes;
+  }
+
+  friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) {
+    if (lhs.is_nan() || rhs.is_nan()) return false;
+    return Float16Base::CompareEq(lhs, rhs);
+  }
+  friend constexpr bool operator!=(Float16Base lhs, Float16Base rhs) {
+    return !(lhs == rhs);
+  }
+
+  friend constexpr bool operator<(Float16Base lhs, Float16Base rhs) {
+    if (lhs.is_nan() || rhs.is_nan()) return false;
+    return Float16Base::CompareLt(lhs, rhs);
+  }
+  friend constexpr bool operator>(Float16Base lhs, Float16Base rhs) { return rhs < lhs; }
+
+  friend constexpr bool operator<=(Float16Base lhs, Float16Base rhs) {
+    if (lhs.is_nan() || rhs.is_nan()) return false;
+    return !Float16Base::CompareLt(rhs, lhs);
+  }
+  friend constexpr bool operator>=(Float16Base lhs, Float16Base rhs) {
+    if (lhs.is_nan() || rhs.is_nan()) return false;
+    return !Float16Base::CompareLt(lhs, rhs);
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, Float16Base arg) {
+    return (os << arg.bits());
+  }
+
+ protected:
+  uint16_t value_;
+
+ private:
+  // Comparison helpers that assume neither operand is NaN
+  static constexpr bool CompareEq(Float16Base lhs, Float16Base rhs) {
+    return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero());
+  }
+  static constexpr bool CompareLt(Float16Base lhs, Float16Base rhs) {
+    if (lhs.signbit()) {
+      if (rhs.signbit()) {
+        // Both are negative
+        return (lhs.bits() & 0x7fff) > (rhs.bits() & 0x7fff);
+      } else {
+        // Handle +/-0
+        return !lhs.is_zero() || rhs.bits() != 0;
+      }
+    } else if (rhs.signbit()) {
+      return false;
+    } else {
+      // Both are positive
+      return (lhs.bits() & 0x7fff) < (rhs.bits() & 0x7fff);
+    }
+  }
+};
+
+/// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t`
+class ARROW_EXPORT Float16 : public Float16Base {
+ public:
+  using Float16Base::Float16Base;
+
+  constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); }
+  constexpr Float16 operator+() const { return Float16(value_); }
+
+  static Float16 FromBytes(const uint8_t* src) {
+    return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
+  }
+};
+
+static_assert(std::is_trivial_v<Float16>);
+
+}  // namespace util
+}  // namespace arrow
+
+// TODO: Not complete
+template <>
+class std::numeric_limits<arrow::util::Float16> {
+  using T = arrow::util::Float16;
+
+ public:
+  static constexpr bool is_specialized = true;
+  static constexpr bool is_signed = true;
+  static constexpr bool has_infinity = true;
+  static constexpr bool has_quiet_NaN = true;
+
+  static constexpr T min() { return T(0b0000010000000000); }
+  static constexpr T max() { return T(0b0111101111111111); }
+  static constexpr T lowest() { return -max(); }
+
+  static constexpr T infinity() { return T(0b0111110000000000); }
+
+  static constexpr T quiet_NaN() { return T(0b0111111111111111); }
+};

From e51d0d157db8768185ac7d1d97aa269253d2db5e Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Fri, 16 Jun 2023 17:27:59 -0400
Subject: [PATCH 06/37] Replace float_internal.h

---
 cpp/src/parquet/float_internal.h   |  61 ---------------
 cpp/src/parquet/statistics.cc      |  72 +++++++++--------
 cpp/src/parquet/statistics_test.cc | 120 ++++++++++++++++++-----------
 3 files changed, 114 insertions(+), 139 deletions(-)
 delete mode 100644 cpp/src/parquet/float_internal.h

diff --git a/cpp/src/parquet/float_internal.h b/cpp/src/parquet/float_internal.h
deleted file mode 100644
index c82c9d575ce3b..0000000000000
--- a/cpp/src/parquet/float_internal.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-
-#include "arrow/util/bit_util.h"
-#include "arrow/util/ubsan.h"
-#include "parquet/types.h"
-
-namespace parquet {
-
-struct float16 {
-  constexpr static uint16_t min() { return 0b1111101111111111; }
-  constexpr static uint16_t max() { return 0b0111101111111111; }
-  constexpr static uint16_t positive_zero() { return 0b0000000000000000; }
-  constexpr static uint16_t negative_zero() { return 0b1000000000000000; }
-
-  static uint8_t* min_ptr() { return min_; }
-  static uint8_t* max_ptr() { return max_; }
-  static uint8_t* positive_zero_ptr() { return positive_zero_; }
-  static uint8_t* negative_zero_ptr() { return negative_zero_; }
-
-  static bool is_nan(uint16_t n) { return (n & 0x7c00) == 0x7c00 && (n & 0x03ff) != 0; }
-  static bool is_zero(uint16_t n) { return (n & 0x7fff) == 0; }
-  static bool signbit(uint16_t n) { return (n & 0x8000) != 0; }
-
-  static uint16_t Pack(const uint8_t* src) {
-    return ::arrow::bit_util::FromLittleEndian(::arrow::util::SafeLoadAs<uint16_t>(src));
-  }
-  static uint16_t Pack(const FLBA& src) { return Pack(src.ptr); }
-
-  static uint8_t* Unpack(uint16_t src, uint8_t* dest) {
-    src = ::arrow::bit_util::ToLittleEndian(src);
-    return static_cast<uint8_t*>(std::memcpy(dest, &src, sizeof(src)));
-  }
-
- private:
-  static inline uint8_t min_[] = {0b11111111, 0b11111011};
-  static inline uint8_t max_[] = {0b11111111, 0b01111011};
-  static inline uint8_t positive_zero_[] = {0b00000000, 0b00000000};
-  static inline uint8_t negative_zero_[] = {0b00000000, 0b10000000};
-};
-
-}  // namespace parquet
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index a3a486539710e..f15d6664bf220 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -30,18 +30,19 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/visit_data_inline.h"
 #include "parquet/encoding.h"
 #include "parquet/exception.h"
-#include "parquet/float_internal.h"
 #include "parquet/platform.h"
 #include "parquet/schema.h"
 
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
 using arrow::internal::checked_cast;
+using arrow::util::Float16;
 using arrow::util::SafeCopy;
 using arrow::util::SafeLoad;
 
@@ -54,6 +55,25 @@ namespace {
 constexpr int value_length(int value_length, const ByteArray& value) { return value.len; }
 constexpr int value_length(int type_length, const FLBA& value) { return type_length; }
 
+// Static "constants" for normalizing float16 min/max values. These need to be expressed
+// as pointers because `Float16LogicalType` represents an FLBA.
+const uint8_t* float16_lowest() {
+  static const auto bytes = std::numeric_limits<Float16>::lowest().ToBytes();
+  return bytes.data();
+}
+const uint8_t* float16_max() {
+  static const auto bytes = std::numeric_limits<Float16>::max().ToBytes();
+  return bytes.data();
+}
+const uint8_t* float16_positive_zero() {
+  static const auto bytes = Float16(0).ToBytes();
+  return bytes.data();
+}
+const uint8_t* float16_negative_zero() {
+  static const auto bytes = (-Float16(0)).ToBytes();
+  return bytes.data();
+}
+
 template <typename DType, bool is_signed>
 struct CompareHelper {
   using T = typename DType::c_type;
@@ -281,31 +301,18 @@ struct CompareHelper<FLBAType, is_signed>
 struct Float16CompareHelper {
   using T = FLBA;
 
-  static T DefaultMin() { return T{float16::max_ptr()}; }
-  static T DefaultMax() { return T{float16::min_ptr()}; }
+  static T DefaultMin() { return T{float16_max()}; }
+  static T DefaultMax() { return T{float16_lowest()}; }
 
   static T Coalesce(T val, T fallback) {
-    return val.ptr != nullptr && float16::is_nan(float16::Pack(val)) ? fallback : val;
+    return val.ptr != nullptr && Float16::FromBytes(val.ptr).is_nan() ? fallback : val;
   }
 
   static inline bool Compare(int type_length, const T& a, const T& b) {
-    uint16_t l = float16::Pack(a);
-    uint16_t r = float16::Pack(b);
-
-    if (l & 0x8000) {
-      if (r & 0x8000) {
-        // Both are negative
-        return (l & 0x7fff) > (r & 0x7fff);
-      } else {
-        // Handle +/-0
-        return (l & 0x7fff) || r != 0;
-      }
-    } else if (r & 0x8000) {
-      return false;
-    } else {
-      // Both are positive
-      return (l & 0x7fff) < (r & 0x7fff);
-    }
+    const auto lhs = Float16::FromBytes(a.ptr);
+    const auto rhs = Float16::FromBytes(b.ptr);
+    // NaN is handled here (same behavior as native float compare)
+    return lhs < rhs;
   }
 
   static T Min(int type_length, const T& a, const T& b) {
@@ -363,27 +370,28 @@ CleanStatistic(std::pair<T, T> min_max, LogicalType::Type::type) {
 }
 
 optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_max) {
-  FLBA min = min_max.first;
-  FLBA max = min_max.second;
-  uint16_t min_packed = float16::Pack(min);
-  uint16_t max_packed = float16::Pack(max);
+  FLBA min_flba = min_max.first;
+  FLBA max_flba = min_max.second;
+  Float16 min = Float16::FromBytes(min_flba.ptr);
+  Float16 max = Float16::FromBytes(max_flba.ptr);
 
-  if (float16::is_nan(min_packed) || float16::is_nan(max_packed)) {
+  if (min.is_nan() || max.is_nan()) {
     return ::std::nullopt;
   }
 
-  if (min_packed == float16::max() && max_packed == float16::min()) {
+  if (min == std::numeric_limits<Float16>::max() &&
+      max == std::numeric_limits<Float16>::lowest()) {
     return ::std::nullopt;
   }
 
-  if (min_packed == float16::positive_zero()) {
-    min = FLBA{float16::negative_zero_ptr()};
+  if (min == Float16(0)) {
+    min_flba = FLBA{float16_negative_zero()};
   }
-  if (max_packed == float16::negative_zero()) {
-    max = FLBA{float16::positive_zero_ptr()};
+  if (max == -Float16(0)) {
+    max_flba = FLBA{float16_positive_zero()};
   }
 
-  return {{min, max}};
+  return {{min_flba, max_flba}};
 }
 
 optional<std::pair<FLBA, FLBA>> CleanStatistic(std::pair<FLBA, FLBA> min_max,
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index d22a0bc681ade..24dac6cf2cd08 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -34,13 +34,13 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/ubsan.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
 #include "parquet/file_reader.h"
 #include "parquet/file_writer.h"
-#include "parquet/float_internal.h"
 #include "parquet/platform.h"
 #include "parquet/schema.h"
 #include "parquet/statistics.h"
@@ -50,6 +50,7 @@
 
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
+using arrow::util::Float16;
 using arrow::util::SafeCopy;
 
 namespace bit_util = arrow::bit_util;
@@ -62,6 +63,28 @@ using schema::PrimitiveNode;
 
 namespace test {
 
+class BufferedFloat16 : public ::arrow::util::Float16Base {
+ public:
+  explicit BufferedFloat16(Float16 f16) : Float16Base(f16) {
+    buffer_ = *::arrow::AllocateBuffer(sizeof(value_));
+    ToBytes(buffer_->mutable_data());
+  }
+  explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {}
+
+  const uint8_t* bytes() const { return buffer_->data(); }
+  const std::shared_ptr<::arrow::Buffer>& buffer() { return buffer_; }
+
+  BufferedFloat16 operator+() const { return *this; }
+  BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); }
+
+  static BufferedFloat16 FromBytes(const uint8_t* src) {
+    return BufferedFloat16(Float16::FromBytes(src));
+  }
+
+ private:
+  std::shared_ptr<::arrow::Buffer> buffer_;
+};
+
 // ----------------------------------------------------------------------
 // Test comparators
 
@@ -1129,21 +1152,30 @@ void TestStatisticsSortOrder<Float16LogicalType>::SetValues() {
   constexpr int kValueLen = 2;
   constexpr int kNumBytes = NUM_VALUES * kValueLen;
 
-  const uint16_t packed_vals[NUM_VALUES] = {
-      0b0000000000000000, 0b0000000000000000, 0b1000000000000000, 0b1000010000000000,
-      0b0111110000001000, 0b1000000000000000, 0b0000010000000000, 0b0000000001000000,
-      0b1111110000001000, 0b1000000001000000};
+  const uint16_t u16_vals[NUM_VALUES] = {
+      0b1100010100000000,  // -5.0
+      0b1100010000000000,  // -4.0
+      0b1100001000000000,  // -3.0
+      0b1100000000000000,  // -2.0
+      0b1011110000000000,  // -1.0
+      0b0000000000000000,  // +0.0
+      0b0011110000000000,  // +1.0
+      0b0100000000000000,  // +2.0
+      0b0100001000000000,  // +3.0
+      0b0100010000000000,  // +4.0
+  };
 
   values_buf_.resize(kNumBytes);
   uint8_t* ptr = values_buf_.data();
   for (int i = 0; i < NUM_VALUES; ++i) {
-    values_[i].ptr = float16::Unpack(packed_vals[i], ptr);
+    Float16(u16_vals[i]).ToBytes(ptr);
+    values_[i].ptr = ptr;
     ptr += kValueLen;
   }
 
   stats_[0]
-      .set_min(std::string(reinterpret_cast<const char*>(values_[3].ptr), kValueLen))
-      .set_max(std::string(reinterpret_cast<const char*>(values_[6].ptr), kValueLen));
+      .set_min(std::string(reinterpret_cast<const char*>(values_[0].ptr), kValueLen))
+      .set_max(std::string(reinterpret_cast<const char*>(values_[9].ptr), kValueLen));
 }
 
 TYPED_TEST_SUITE(TestStatisticsSortOrder, CompareTestTypes);
@@ -1416,8 +1448,11 @@ void TestFloatStatistics<T>::Init() {
 }
 template <>
 void TestFloatStatistics<Float16LogicalType>::Init() {
-  positive_zero_ = c_type{float16::positive_zero_ptr()};
-  negative_zero_ = c_type{float16::negative_zero_ptr()};
+  data_buf_.resize(4);
+  (+Float16(0)).ToBytes(&data_buf_[0]);
+  positive_zero_ = FLBA{&data_buf_[0]};
+  (-Float16(0)).ToBytes(&data_buf_[2]);
+  negative_zero_ = FLBA{&data_buf_[2]};
 }
 
 template <typename T>
@@ -1437,9 +1472,8 @@ void TestFloatStatistics<T>::CheckEq(const c_type& l, const c_type& r) {
 }
 template <>
 void TestFloatStatistics<Float16LogicalType>::CheckEq(const c_type& a, const c_type& b) {
-  auto l = float16::Pack(a);
-  auto r = float16::Pack(b);
-  if (float16::is_zero(l) && float16::is_zero(r)) return;
+  auto l = Float16::FromBytes(a.ptr);
+  auto r = Float16::FromBytes(b.ptr);
   ASSERT_EQ(l, r);
 }
 
@@ -1449,7 +1483,7 @@ bool TestFloatStatistics<T>::signbit(c_type val) {
 }
 template <>
 bool TestFloatStatistics<Float16LogicalType>::signbit(c_type val) {
-  return float16::signbit(float16::Pack(val));
+  return Float16::FromBytes(val.ptr).signbit();
 }
 
 template <typename T>
@@ -1477,45 +1511,39 @@ void TestFloatStatistics<T>::TestNaNs() {
 template <>
 void TestFloatStatistics<Float16LogicalType>::TestNaNs() {
   constexpr int kNumValues = 8;
-  constexpr int kValueLen = sizeof(uint16_t);
 
   NodePtr node = this->MakeNode("f", Repetition::OPTIONAL);
   ColumnDescriptor descr(node, 1, 1);
 
-  const uint16_t nan_int = 0b1111110010101010;
-  const uint16_t min_int = 0b1010010111000110;
-  const uint16_t max_int = 0b0011100011010011;
-  uint8_t min_max_data[2 * kValueLen];
-  const auto min = FLBA{float16::Unpack(min_int, &min_max_data[0 * kValueLen])};
-  const auto max = FLBA{float16::Unpack(max_int, &min_max_data[1 * kValueLen])};
-
-  std::array<uint16_t, kNumValues> all_nans_packed = {nan_int, nan_int, nan_int, nan_int,
-                                                      nan_int, nan_int, nan_int, nan_int};
-  std::array<uint16_t, kNumValues> some_nans_packed = {nan_int,
-                                                       max_int,
-                                                       0b1000111000110000,
-                                                       0b1000010001000001,
-                                                       nan_int,
-                                                       0b0000100000011110,
-                                                       min_int,
-                                                       nan_int};
-  std::array<uint16_t, kNumValues> other_nans_packed = some_nans_packed;
-  other_nans_packed[0] = 0b0000010000110011;
-
-  std::array<uint8_t, (kNumValues * kValueLen * 3)> bytes;
-  uint8_t* at = bytes.data();
-  auto prepare_values = [&](const auto& packed_values) -> std::vector<FLBA> {
-    std::vector<FLBA> out;
-    for (uint16_t packed : packed_values) {
-      out.push_back(FLBA{float16::Unpack(packed, at)});
-      at += kValueLen;
-    }
+  using F16 = BufferedFloat16;
+  const auto nan_f16 = F16(std::numeric_limits<Float16>::quiet_NaN());
+  const auto min_f16 = F16(0xc400);  // -4.0
+  const auto max_f16 = F16(0x4200);  // +3.0
+
+  const auto min = FLBA{min_f16.bytes()};
+  const auto max = FLBA{max_f16.bytes()};
+
+  std::array<F16, kNumValues> all_nans_f16 = {nan_f16, nan_f16, nan_f16, nan_f16,
+                                              nan_f16, nan_f16, nan_f16, nan_f16};
+  std::array<F16, kNumValues> some_nans_f16 = {nan_f16,     max_f16,
+                                               F16(0xc200),  // -3.0
+                                               F16(0xbc00),  // -1.0
+                                               nan_f16,
+                                               F16(0x4000),  // +2.0
+                                               min_f16,     nan_f16};
+  std::array<F16, kNumValues> other_nans_f16 = some_nans_f16;
+  other_nans_f16[0] = F16(0x3e00);  // +1.5
+
+  auto prepare_values = [](const auto& values) -> std::vector<FLBA> {
+    std::vector<FLBA> out(values.size());
+    std::transform(values.begin(), values.end(), out.begin(),
+                   [](const F16& f16) { return FLBA{f16.bytes()}; });
     return out;
   };
 
-  auto all_nans = prepare_values(all_nans_packed);
-  auto some_nans = prepare_values(some_nans_packed);
-  auto other_nans = prepare_values(other_nans_packed);
+  auto all_nans = prepare_values(all_nans_f16);
+  auto some_nans = prepare_values(some_nans_f16);
+  auto other_nans = prepare_values(other_nans_f16);
 
   uint8_t valid_bitmap = 0x7F;  // 0b01111111
   // NaNs excluded

From a2f72acbee57c20ecf584739f8c99b8d90bdbfcd Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Fri, 16 Jun 2023 17:51:46 -0400
Subject: [PATCH 07/37] Minor test tweaks

---
 cpp/src/parquet/statistics_test.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 24dac6cf2cd08..ea285ab99179d 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -1359,7 +1359,10 @@ class TestFloatStatistics : public ::testing::Test {
   using c_type = typename ParquetType::c_type;
 
   void Init();
-  void SetUp() override { this->Init(); }
+  void SetUp() override {
+    this->Init();
+    ASSERT_NE(EncodeValue(negative_zero_), EncodeValue(positive_zero_));
+  }
 
   bool signbit(c_type val);
   void CheckEq(const c_type& l, const c_type& r);
@@ -1372,9 +1375,11 @@ class TestFloatStatistics : public ::testing::Test {
 
     this->CheckEq(stats->min(), positive_zero_);
     ASSERT_TRUE(this->signbit(stats->min()));
+    ASSERT_EQ(stats->EncodeMin(), EncodeValue(negative_zero_));
 
     this->CheckEq(stats->max(), positive_zero_);
     ASSERT_FALSE(this->signbit(stats->max()));
+    ASSERT_EQ(stats->EncodeMax(), EncodeValue(positive_zero_));
   }
 
   // ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in
@@ -1416,9 +1421,9 @@ class TestFloatStatistics : public ::testing::Test {
     auto some_nan_stats = MakeStatistics<ParquetType>(descr);
     // Ingesting only nans should not yield valid min max
     AssertUnsetMinMax(some_nan_stats, all_nans);
-    // Ingesting a mix of NaNs and non-NaNs should not yield valid min max.
+    // Ingesting a mix of NaNs and non-NaNs should yield a valid min max.
     AssertMinMaxAre(some_nan_stats, some_nans, min, max);
-    // Ingesting only nans after a valid min/max, should have not effect
+    // Ingesting only nans after a valid min/max, should have no effect
     AssertMinMaxAre(some_nan_stats, all_nans, min, max);
 
     some_nan_stats = MakeStatistics<ParquetType>(descr);

From 1163b4e7708a48030e3057056df3b255815f68b4 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Sat, 17 Jun 2023 20:40:21 -0400
Subject: [PATCH 08/37] Add tests for Float16 operators

---
 cpp/src/arrow/util/CMakeLists.txt  |   1 +
 cpp/src/arrow/util/float16_test.cc | 135 +++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 cpp/src/arrow/util/float16_test.cc

diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 3dc8eac1abf64..2e9487dcf50c8 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -48,6 +48,7 @@ add_arrow_test(utility-test
                checked_cast_test.cc
                compression_test.cc
                decimal_test.cc
+               float16_test.cc
                formatting_util_test.cc
                key_value_metadata_test.cc
                hashing_test.cc
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
new file mode 100644
index 0000000000000..75ee9dc816b97
--- /dev/null
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/float16.h"
+
+namespace arrow {
+namespace util {
+namespace {
+
+template <typename T>
+using Limits = std::numeric_limits<T>;
+
+// Holds a float16 and its equivalent float32
+struct TestValue {
+  TestValue(Float16 f16, float f32) : f16(f16), f32(f32) {}
+  TestValue(uint16_t u16, float f32) : TestValue(Float16(u16), f32) {}
+
+  Float16 f16;
+  float f32;
+};
+
+#define GENERATE_OPERATOR(NAME, OP)                              \
+  struct NAME {                                                  \
+    std::pair<bool, bool> operator()(TestValue l, TestValue r) { \
+      return std::make_pair((l.f32 OP r.f32), (l.f16 OP r.f16)); \
+    }                                                            \
+  }
+
+GENERATE_OPERATOR(CompareEq, ==);
+GENERATE_OPERATOR(CompareNe, !=);
+GENERATE_OPERATOR(CompareLt, <);
+GENERATE_OPERATOR(CompareGt, >);
+GENERATE_OPERATOR(CompareLe, <=);
+GENERATE_OPERATOR(CompareGe, >=);
+
+#undef GENERATE_OPERATOR
+
+const std::vector<TestValue> g_test_values = {
+    TestValue(Limits<Float16>::min(), +0.00006104f),
+    TestValue(Limits<Float16>::max(), +65504.0f),
+    TestValue(Limits<Float16>::lowest(), -65504.0f),
+    TestValue(+Limits<Float16>::infinity(), +Limits<float>::infinity()),
+    TestValue(-Limits<Float16>::infinity(), -Limits<float>::infinity()),
+    // Multiple (semantically equivalent) NaN representations
+    TestValue(0x7fff, Limits<float>::quiet_NaN()),
+    TestValue(0xffff, Limits<float>::quiet_NaN()),
+    TestValue(0x7e00, Limits<float>::quiet_NaN()),
+    TestValue(0xfe00, Limits<float>::quiet_NaN()),
+    // Positive/negative zeroes
+    TestValue(0x0000, +0.0f),
+    TestValue(0x8000, -0.0f),
+    // Miscellaneous values. In general, they're chosen to test the sign/exponent and
+    // exponent/mantissa boundaries
+    TestValue(0x101c, +0.000502f),
+    TestValue(0x901c, -0.000502f),
+    TestValue(0x101d, +0.0005022f),
+    TestValue(0x901d, -0.0005022f),
+    TestValue(0x121c, +0.000746f),
+    TestValue(0x921c, -0.000746f),
+    TestValue(0x141c, +0.001004f),
+    TestValue(0x941c, -0.001004f),
+    TestValue(0x501c, +32.9f),
+    TestValue(0xd01c, -32.9f),
+    // A few subnormals for good measure
+    TestValue(0x001c, +0.0000017f),
+    TestValue(0x801c, -0.0000017f),
+    TestValue(0x021c, +0.0000332f),
+    TestValue(0x821c, -0.0000332f),
+};
+
+template <typename Operator>
+class Float16OperatorTest : public ::testing::Test {
+ public:
+  void TestCompare(const std::vector<TestValue>& test_values) {
+    // Check all combinations of operands in both directions
+    for (size_t i = 0; i < test_values.size(); ++i) {
+      this->TestCompare(test_values, static_cast<int>(i));
+    }
+  }
+
+  void TestCompare(const std::vector<TestValue>& test_values, int offset) {
+    const auto num_values = static_cast<int>(test_values.size());
+    ASSERT_TRUE(offset >= 0 && offset < num_values);
+
+    int i = 0;
+    int j = offset;
+    while (i < num_values) {
+      ARROW_SCOPED_TRACE(i, ",", j);
+
+      auto a = test_values[i];
+      auto b = test_values[j];
+      std::pair<bool, bool> ret;
+
+      // Results for float16 and float32 should be the same
+      ret = Operator{}(a, b);
+      ASSERT_EQ(ret.first, ret.second);
+      ret = Operator{}(b, a);
+      ASSERT_EQ(ret.first, ret.second);
+
+      ++i;
+      j = (j + 1) % num_values;
+    }
+  }
+};
+
+using OperatorTypes =
+    ::testing::Types<CompareEq, CompareNe, CompareLt, CompareGt, CompareLe, CompareGe>;
+
+TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes);
+
+TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); }
+
+}  // namespace
+}  // namespace util
+}  // namespace arrow

From bc640ff62a5ee5c9079c9a3f233aabd02e1860b1 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Sat, 17 Jun 2023 21:55:15 -0400
Subject: [PATCH 09/37] Support multiple endians in Float16 class

---
 cpp/src/arrow/util/float16.h       | 37 ++++++++++++++++++++++++++----
 cpp/src/arrow/util/float16_test.cc | 33 ++++++++++++++++++++++++++
 cpp/src/parquet/statistics.cc      | 19 +++++++--------
 cpp/src/parquet/statistics_test.cc | 18 ++++++---------
 4 files changed, 82 insertions(+), 25 deletions(-)

diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index fedba0c29dc75..f2db88f3d3ea2 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -40,7 +40,7 @@ namespace util {
 ///
 /// NOTE: Methods in the class should not mutate the unerlying value or produce copies.
 /// Such functionality is delegated to subclasses.
-class ARROW_EXPORT Float16Base {
+class Float16Base {
  public:
   Float16Base() = default;
   constexpr explicit Float16Base(uint16_t value) : value_(value) {}
@@ -56,13 +56,32 @@ class ARROW_EXPORT Float16Base {
   constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; }
   constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }
 
-  void ToBytes(uint8_t* dest) const {
+  /// \brief Copy the value's bytes in native-endian byte order
+  void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }
+  /// \brief Return the value's bytes in native-endian byte order
+  std::array<uint8_t, 2> ToBytes() const {
+    std::array<uint8_t, 2> bytes;
+    ToBytes(bytes.data());
+    return bytes;
+  }
+
+  void ToLittleEndian(uint8_t* dest) const {
     auto value = bit_util::ToLittleEndian(value_);
     std::memcpy(dest, &value, sizeof(value));
   }
-  std::array<uint8_t, 2> ToBytes() const {
+  std::array<uint8_t, 2> ToLittleEndian() const {
     std::array<uint8_t, 2> bytes;
-    ToBytes(bytes.data());
+    ToLittleEndian(bytes.data());
+    return bytes;
+  }
+
+  void ToBigEndian(uint8_t* dest) const {
+    auto value = bit_util::ToBigEndian(value_);
+    std::memcpy(dest, &value, sizeof(value));
+  }
+  std::array<uint8_t, 2> ToBigEndian() const {
+    std::array<uint8_t, 2> bytes;
+    ToBigEndian(bytes.data());
     return bytes;
   }
 
@@ -120,16 +139,24 @@ class ARROW_EXPORT Float16Base {
 };
 
 /// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t`
-class ARROW_EXPORT Float16 : public Float16Base {
+class Float16 : public Float16Base {
  public:
   using Float16Base::Float16Base;
 
   constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); }
   constexpr Float16 operator+() const { return Float16(value_); }
 
+  /// \brief Read a `Float16` from memory in native-endian byte order
   static Float16 FromBytes(const uint8_t* src) {
+    return Float16(SafeLoadAs<uint16_t>(src));
+  }
+
+  static Float16 FromLittleEndian(const uint8_t* src) {
     return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
   }
+  static Float16 FromBigEndian(const uint8_t* src) {
+    return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
+  }
 };
 
 static_assert(std::is_trivial_v<Float16>);
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index 75ee9dc816b97..4e6bc64d5b6a6 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -15,13 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <array>
 #include <utility>
 #include <vector>
 
 #include <gtest/gtest.h>
 
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/endian.h"
 #include "arrow/util/float16.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace util {
@@ -130,6 +133,36 @@ TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes);
 
 TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); }
 
+TEST(Float16Test, ToBytes) {
+  constexpr auto f16 = Float16(0xd01c);
+  auto bytes = f16.ToBytes();
+  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
+#if ARROW_LITTLE_ENDIAN
+  bytes = f16.ToLittleEndian();
+  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
+  bytes = f16.ToBigEndian();
+  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
+#else
+  bytes = f16.ToLittleEndian();
+  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
+  bytes = f16.ToBigEndian();
+  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
+#endif
+}
+
+TEST(Float16Test, FromBytes) {
+  constexpr uint16_t u16 = 0xd01c;
+  const auto* data = reinterpret_cast<const uint8_t*>(&u16);
+  ASSERT_EQ(Float16::FromBytes(data), Float16(0xd01c));
+#if ARROW_LITTLE_ENDIAN
+  ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0xd01c));
+  ASSERT_EQ(Float16::FromBigEndian(data), Float16(0x1cd0));
+#else
+  ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0));
+  ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c));
+#endif
+}
+
 }  // namespace
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index f15d6664bf220..2af592bc011d2 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -58,19 +58,19 @@ constexpr int value_length(int type_length, const FLBA& value) { return type_len
 // Static "constants" for normalizing float16 min/max values. These need to be expressed
 // as pointers because `Float16LogicalType` represents an FLBA.
 const uint8_t* float16_lowest() {
-  static const auto bytes = std::numeric_limits<Float16>::lowest().ToBytes();
+  static const auto bytes = std::numeric_limits<Float16>::lowest().ToLittleEndian();
   return bytes.data();
 }
 const uint8_t* float16_max() {
-  static const auto bytes = std::numeric_limits<Float16>::max().ToBytes();
+  static const auto bytes = std::numeric_limits<Float16>::max().ToLittleEndian();
   return bytes.data();
 }
 const uint8_t* float16_positive_zero() {
-  static const auto bytes = Float16(0).ToBytes();
+  static const auto bytes = Float16(0).ToLittleEndian();
   return bytes.data();
 }
 const uint8_t* float16_negative_zero() {
-  static const auto bytes = (-Float16(0)).ToBytes();
+  static const auto bytes = (-Float16(0)).ToLittleEndian();
   return bytes.data();
 }
 
@@ -305,12 +305,13 @@ struct Float16CompareHelper {
   static T DefaultMax() { return T{float16_lowest()}; }
 
   static T Coalesce(T val, T fallback) {
-    return val.ptr != nullptr && Float16::FromBytes(val.ptr).is_nan() ? fallback : val;
+    return val.ptr != nullptr && Float16::FromLittleEndian(val.ptr).is_nan() ? fallback
+                                                                             : val;
   }
 
   static inline bool Compare(int type_length, const T& a, const T& b) {
-    const auto lhs = Float16::FromBytes(a.ptr);
-    const auto rhs = Float16::FromBytes(b.ptr);
+    const auto lhs = Float16::FromLittleEndian(a.ptr);
+    const auto rhs = Float16::FromLittleEndian(b.ptr);
     // NaN is handled here (same behavior as native float compare)
     return lhs < rhs;
   }
@@ -372,8 +373,8 @@ CleanStatistic(std::pair<T, T> min_max, LogicalType::Type::type) {
 optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_max) {
   FLBA min_flba = min_max.first;
   FLBA max_flba = min_max.second;
-  Float16 min = Float16::FromBytes(min_flba.ptr);
-  Float16 max = Float16::FromBytes(max_flba.ptr);
+  Float16 min = Float16::FromLittleEndian(min_flba.ptr);
+  Float16 max = Float16::FromLittleEndian(max_flba.ptr);
 
   if (min.is_nan() || max.is_nan()) {
     return ::std::nullopt;
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index ea285ab99179d..7de4e3f3840bf 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -67,7 +67,7 @@ class BufferedFloat16 : public ::arrow::util::Float16Base {
  public:
   explicit BufferedFloat16(Float16 f16) : Float16Base(f16) {
     buffer_ = *::arrow::AllocateBuffer(sizeof(value_));
-    ToBytes(buffer_->mutable_data());
+    ToLittleEndian(buffer_->mutable_data());
   }
   explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {}
 
@@ -77,10 +77,6 @@ class BufferedFloat16 : public ::arrow::util::Float16Base {
   BufferedFloat16 operator+() const { return *this; }
   BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); }
 
-  static BufferedFloat16 FromBytes(const uint8_t* src) {
-    return BufferedFloat16(Float16::FromBytes(src));
-  }
-
  private:
   std::shared_ptr<::arrow::Buffer> buffer_;
 };
@@ -1168,7 +1164,7 @@ void TestStatisticsSortOrder<Float16LogicalType>::SetValues() {
   values_buf_.resize(kNumBytes);
   uint8_t* ptr = values_buf_.data();
   for (int i = 0; i < NUM_VALUES; ++i) {
-    Float16(u16_vals[i]).ToBytes(ptr);
+    Float16(u16_vals[i]).ToLittleEndian(ptr);
     values_[i].ptr = ptr;
     ptr += kValueLen;
   }
@@ -1454,9 +1450,9 @@ void TestFloatStatistics<T>::Init() {
 template <>
 void TestFloatStatistics<Float16LogicalType>::Init() {
   data_buf_.resize(4);
-  (+Float16(0)).ToBytes(&data_buf_[0]);
+  (+Float16(0)).ToLittleEndian(&data_buf_[0]);
   positive_zero_ = FLBA{&data_buf_[0]};
-  (-Float16(0)).ToBytes(&data_buf_[2]);
+  (-Float16(0)).ToLittleEndian(&data_buf_[2]);
   negative_zero_ = FLBA{&data_buf_[2]};
 }
 
@@ -1477,8 +1473,8 @@ void TestFloatStatistics<T>::CheckEq(const c_type& l, const c_type& r) {
 }
 template <>
 void TestFloatStatistics<Float16LogicalType>::CheckEq(const c_type& a, const c_type& b) {
-  auto l = Float16::FromBytes(a.ptr);
-  auto r = Float16::FromBytes(b.ptr);
+  auto l = Float16::FromLittleEndian(a.ptr);
+  auto r = Float16::FromLittleEndian(b.ptr);
   ASSERT_EQ(l, r);
 }
 
@@ -1488,7 +1484,7 @@ bool TestFloatStatistics<T>::signbit(c_type val) {
 }
 template <>
 bool TestFloatStatistics<Float16LogicalType>::signbit(c_type val) {
-  return Float16::FromBytes(val.ptr).signbit();
+  return Float16::FromLittleEndian(val.ptr).signbit();
 }
 
 template <typename T>

From 2d7e65fa932b7db1426bb9f8bce90e063d4dcda3 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Sat, 17 Jun 2023 23:21:51 -0400
Subject: [PATCH 10/37] Small refactor

---
 cpp/src/arrow/util/float16.h       | 14 +++-------
 cpp/src/arrow/util/float16_test.cc | 42 ++++++++++++------------------
 2 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index f2db88f3d3ea2..7959a29aa7900 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -66,23 +66,17 @@ class Float16Base {
   }
 
   void ToLittleEndian(uint8_t* dest) const {
-    auto value = bit_util::ToLittleEndian(value_);
-    std::memcpy(dest, &value, sizeof(value));
+    Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(dest);
   }
   std::array<uint8_t, 2> ToLittleEndian() const {
-    std::array<uint8_t, 2> bytes;
-    ToLittleEndian(bytes.data());
-    return bytes;
+    return Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes();
   }
 
   void ToBigEndian(uint8_t* dest) const {
-    auto value = bit_util::ToBigEndian(value_);
-    std::memcpy(dest, &value, sizeof(value));
+    Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(dest);
   }
   std::array<uint8_t, 2> ToBigEndian() const {
-    std::array<uint8_t, 2> bytes;
-    ToBigEndian(bytes.data());
-    return bytes;
+    return Float16Base{bit_util::ToBigEndian(value_)}.ToBytes();
   }
 
   friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) {
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index 4e6bc64d5b6a6..446d89c30a788 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -95,33 +95,25 @@ template <typename Operator>
 class Float16OperatorTest : public ::testing::Test {
  public:
   void TestCompare(const std::vector<TestValue>& test_values) {
-    // Check all combinations of operands in both directions
-    for (size_t i = 0; i < test_values.size(); ++i) {
-      this->TestCompare(test_values, static_cast<int>(i));
-    }
-  }
-
-  void TestCompare(const std::vector<TestValue>& test_values, int offset) {
     const auto num_values = static_cast<int>(test_values.size());
-    ASSERT_TRUE(offset >= 0 && offset < num_values);
 
-    int i = 0;
-    int j = offset;
-    while (i < num_values) {
-      ARROW_SCOPED_TRACE(i, ",", j);
-
-      auto a = test_values[i];
-      auto b = test_values[j];
-      std::pair<bool, bool> ret;
-
-      // Results for float16 and float32 should be the same
-      ret = Operator{}(a, b);
-      ASSERT_EQ(ret.first, ret.second);
-      ret = Operator{}(b, a);
-      ASSERT_EQ(ret.first, ret.second);
-
-      ++i;
-      j = (j + 1) % num_values;
+    // Check all combinations of operands in both directions
+    for (int offset = 0; offset < num_values; ++offset) {
+      int i = 0;
+      int j = offset;
+      while (i < num_values) {
+        ARROW_SCOPED_TRACE(i, ",", j);
+
+        auto a = test_values[i];
+        auto b = test_values[j];
+
+        // Results for float16 and float32 should be the same
+        auto ret = Operator{}(a, b);
+        ASSERT_EQ(ret.first, ret.second);
+
+        ++i;
+        j = (j + 1) % num_values;
+      }
     }
   }
 };

From 5e925ac62c5488c1b8acde78abd764c627babd1f Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Wed, 21 Jun 2023 14:23:29 -0400
Subject: [PATCH 11/37] Address more review points

---
 cpp/src/arrow/CMakeLists.txt       |  1 +
 cpp/src/arrow/util/float16.cc      | 28 +++++++++++
 cpp/src/arrow/util/float16.h       | 53 ++++++++++++++-------
 cpp/src/arrow/util/float16_test.cc | 40 +++++++++-------
 cpp/src/parquet/statistics.cc      | 74 ++++++++++++++++--------------
 5 files changed, 128 insertions(+), 68 deletions(-)
 create mode 100644 cpp/src/arrow/util/float16.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 101b089ba837f..24e8eefad1523 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -223,6 +223,7 @@ set(ARROW_SRCS
     util/debug.cc
     util/decimal.cc
     util/delimiting.cc
+    util/float16.cc
     util/formatting.cc
     util/future.cc
     util/hashing.cc
diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
new file mode 100644
index 0000000000000..825cbf0cb1fa3
--- /dev/null
+++ b/cpp/src/arrow/util/float16.cc
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <ostream>
+
+#include "arrow/util/float16.h"
+
+namespace arrow {
+namespace util {
+
+std::ostream& operator<<(std::ostream& os, Float16Base arg) { return (os << arg.bits()); }
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index 7959a29aa7900..74308a09a3cfd 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -18,13 +18,13 @@
 #pragma once
 
 #include <array>
-#include <cmath>
 #include <cstdint>
 #include <cstring>
+#include <iosfwd>
 #include <limits>
 #include <type_traits>
 
-#include "arrow/util/bit_util.h"
+#include "arrow/util/endian.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/util/visibility.h"
 
@@ -45,38 +45,57 @@ class Float16Base {
   Float16Base() = default;
   constexpr explicit Float16Base(uint16_t value) : value_(value) {}
 
+  /// \brief Return the value's integer representation
   constexpr uint16_t bits() const { return value_; }
   constexpr explicit operator uint16_t() const { return bits(); }
 
+  /// \brief Return true if the value is negative (sign bit is set)
   constexpr bool signbit() const { return (value_ & 0x8000) != 0; }
 
+  /// \brief Return true if the value is NaN
   constexpr bool is_nan() const {
     return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0;
   }
+  /// \brief Return true if the value is positive/negative infinity
   constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; }
+  /// \brief Return true if the value is positive/negative zero
   constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }
 
   /// \brief Copy the value's bytes in native-endian byte order
   void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }
   /// \brief Return the value's bytes in native-endian byte order
-  std::array<uint8_t, 2> ToBytes() const {
-    std::array<uint8_t, 2> bytes;
-    ToBytes(bytes.data());
-    return bytes;
+  constexpr std::array<uint8_t, 2> ToBytes() const {
+#if ARROW_LITTLE_ENDIAN
+    return ToLittleEndian();
+#else
+    return ToBigEndian();
+#endif
   }
 
+  /// \brief Copy the value's bytes in little-endian byte order
   void ToLittleEndian(uint8_t* dest) const {
     Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(dest);
   }
-  std::array<uint8_t, 2> ToLittleEndian() const {
-    return Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes();
+  /// \brief Return the value's bytes in little-endian byte order
+  constexpr std::array<uint8_t, 2> ToLittleEndian() const {
+#if ARROW_LITTLE_ENDIAN
+    return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)};
+#else
+    return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)};
+#endif
   }
 
+  /// \brief Copy the value's bytes in big-endian byte order
   void ToBigEndian(uint8_t* dest) const {
     Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(dest);
   }
-  std::array<uint8_t, 2> ToBigEndian() const {
-    return Float16Base{bit_util::ToBigEndian(value_)}.ToBytes();
+  /// \brief Return the value's bytes in big-endian byte order
+  constexpr std::array<uint8_t, 2> ToBigEndian() const {
+#if ARROW_LITTLE_ENDIAN
+    return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)};
+#else
+    return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)};
+#endif
   }
 
   friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) {
@@ -98,13 +117,10 @@ class Float16Base {
     return !Float16Base::CompareLt(rhs, lhs);
   }
   friend constexpr bool operator>=(Float16Base lhs, Float16Base rhs) {
-    if (lhs.is_nan() || rhs.is_nan()) return false;
-    return !Float16Base::CompareLt(lhs, rhs);
+    return rhs <= lhs;
   }
 
-  friend std::ostream& operator<<(std::ostream& os, Float16Base arg) {
-    return (os << arg.bits());
-  }
+  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16Base arg);
 
  protected:
   uint16_t value_;
@@ -118,7 +134,7 @@ class Float16Base {
     if (lhs.signbit()) {
       if (rhs.signbit()) {
         // Both are negative
-        return (lhs.bits() & 0x7fff) > (rhs.bits() & 0x7fff);
+        return lhs.bits() > rhs.bits();
       } else {
         // Handle +/-0
         return !lhs.is_zero() || rhs.bits() != 0;
@@ -127,7 +143,7 @@ class Float16Base {
       return false;
     } else {
       // Both are positive
-      return (lhs.bits() & 0x7fff) < (rhs.bits() & 0x7fff);
+      return lhs.bits() < rhs.bits();
     }
   }
 };
@@ -145,9 +161,12 @@ class Float16 : public Float16Base {
     return Float16(SafeLoadAs<uint16_t>(src));
   }
 
+  /// \brief Read a `Float16` from memory in little-endian byte order
   static Float16 FromLittleEndian(const uint8_t* src) {
     return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
   }
+
+  /// \brief Read a `Float16` from memory in big-endian byte order
   static Float16 FromBigEndian(const uint8_t* src) {
     return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
   }
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index 446d89c30a788..1ccb9db7b0e25 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -98,10 +98,8 @@ class Float16OperatorTest : public ::testing::Test {
     const auto num_values = static_cast<int>(test_values.size());
 
     // Check all combinations of operands in both directions
-    for (int offset = 0; offset < num_values; ++offset) {
-      int i = 0;
-      int j = offset;
-      while (i < num_values) {
+    for (int i = 0; i < num_values; ++i) {
+      for (int j = 0; j < num_values; ++j) {
         ARROW_SCOPED_TRACE(i, ",", j);
 
         auto a = test_values[i];
@@ -110,9 +108,6 @@ class Float16OperatorTest : public ::testing::Test {
         // Results for float16 and float32 should be the same
         auto ret = Operator{}(a, b);
         ASSERT_EQ(ret.first, ret.second);
-
-        ++i;
-        j = (j + 1) % num_values;
       }
     }
   }
@@ -127,19 +122,32 @@ TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); }
 
 TEST(Float16Test, ToBytes) {
   constexpr auto f16 = Float16(0xd01c);
-  auto bytes = f16.ToBytes();
-  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
+  std::array<uint8_t, 2> bytes;
+  auto load = [&bytes]() { return SafeLoadAs<uint16_t>(bytes.data()); };
+
+  // Test native-endian
+  f16.ToBytes(bytes.data());
+  ASSERT_EQ(load(), 0xd01c);
+  bytes = f16.ToBytes();
+  ASSERT_EQ(load(), 0xd01c);
+
 #if ARROW_LITTLE_ENDIAN
-  bytes = f16.ToLittleEndian();
-  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
-  bytes = f16.ToBigEndian();
-  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
+  constexpr uint16_t expected_le = 0xd01c;
+  constexpr uint16_t expected_be = 0x1cd0;
 #else
+  constexpr uint16_t expected_le = 0x1cd0;
+  constexpr uint16_t expected_be = 0xd01c;
+#endif
+  // Test little-endian
+  f16.ToLittleEndian(bytes.data());
+  ASSERT_EQ(load(), expected_le);
   bytes = f16.ToLittleEndian();
-  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
+  ASSERT_EQ(load(), expected_le);
+  // Test big-endian
+  f16.ToBigEndian(bytes.data());
+  ASSERT_EQ(load(), expected_be);
   bytes = f16.ToBigEndian();
-  ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
-#endif
+  ASSERT_EQ(load(), expected_be);
 }
 
 TEST(Float16Test, FromBytes) {
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index 2af592bc011d2..a7691dd568796 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -57,22 +57,20 @@ constexpr int value_length(int type_length, const FLBA& value) { return type_len
 
 // Static "constants" for normalizing float16 min/max values. These need to be expressed
 // as pointers because `Float16LogicalType` represents an FLBA.
-const uint8_t* float16_lowest() {
-  static const auto bytes = std::numeric_limits<Float16>::lowest().ToLittleEndian();
-  return bytes.data();
-}
-const uint8_t* float16_max() {
-  static const auto bytes = std::numeric_limits<Float16>::max().ToLittleEndian();
-  return bytes.data();
-}
-const uint8_t* float16_positive_zero() {
-  static const auto bytes = Float16(0).ToLittleEndian();
-  return bytes.data();
-}
-const uint8_t* float16_negative_zero() {
-  static const auto bytes = (-Float16(0)).ToLittleEndian();
-  return bytes.data();
-}
+struct Float16Constants {
+  static constexpr const uint8_t* lowest() { return lowest_.data(); }
+  static constexpr const uint8_t* max() { return max_.data(); }
+  static constexpr const uint8_t* positive_zero() { return positive_zero_.data(); }
+  static constexpr const uint8_t* negative_zero() { return negative_zero_.data(); }
+
+ private:
+  using Bytes = std::array<uint8_t, 2>;
+  static constexpr Bytes lowest_ =
+      std::numeric_limits<Float16>::lowest().ToLittleEndian();
+  static constexpr Bytes max_ = std::numeric_limits<Float16>::max().ToLittleEndian();
+  static constexpr Bytes positive_zero_ = (+Float16(0)).ToLittleEndian();
+  static constexpr Bytes negative_zero_ = (-Float16(0)).ToLittleEndian();
+};
 
 template <typename DType, bool is_signed>
 struct CompareHelper {
@@ -301,12 +299,12 @@ struct CompareHelper<FLBAType, is_signed>
 struct Float16CompareHelper {
   using T = FLBA;
 
-  static T DefaultMin() { return T{float16_max()}; }
-  static T DefaultMax() { return T{float16_lowest()}; }
+  static T DefaultMin() { return T{Float16Constants::max()}; }
+  static T DefaultMax() { return T{Float16Constants::lowest()}; }
 
   static T Coalesce(T val, T fallback) {
-    return val.ptr != nullptr && Float16::FromLittleEndian(val.ptr).is_nan() ? fallback
-                                                                             : val;
+    return (val.ptr == nullptr || Float16::FromLittleEndian(val.ptr).is_nan()) ? fallback
+                                                                               : val;
   }
 
   static inline bool Compare(int type_length, const T& a, const T& b) {
@@ -386,10 +384,10 @@ optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_
   }
 
   if (min == Float16(0)) {
-    min_flba = FLBA{float16_negative_zero()};
+    min_flba = FLBA{Float16Constants::negative_zero()};
   }
   if (max == -Float16(0)) {
-    max_flba = FLBA{float16_positive_zero()};
+    max_flba = FLBA{Float16Constants::positive_zero()};
   }
 
   return {{min_flba, max_flba}};
@@ -540,13 +538,13 @@ std::pair<ByteArray, ByteArray> TypedComparatorImpl<false, ByteArrayType>::GetMi
   return GetMinMaxBinaryHelper<false>(*this, values);
 }
 
-static LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) {
+LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) {
   if (const auto& logical_type = descr->logical_type()) {
     return logical_type->type();
   }
   return LogicalType::Type::NONE;
 }
-static LogicalType::Type::type LogicalTypeId(const Statistics& stats) {
+LogicalType::Type::type LogicalTypeId(const Statistics& stats) {
   return LogicalTypeId(stats.descr());
 }
 
@@ -618,20 +616,26 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
 
   void IncrementNumValues(int64_t n) override { num_values_ += n; }
 
+  static bool IsMeaningfulLogicalType(LogicalType::Type::type type) {
+    switch (type) {
+      case LogicalType::Type::FLOAT16:
+        return true;
+      default:
+        return false;
+    }
+  }
+
   bool Equals(const Statistics& raw_other) const override {
     if (physical_type() != raw_other.physical_type()) return false;
 
     const auto logical_id = LogicalTypeId(*this);
-    switch (logical_id) {
-      // Only compare against logical types that influence the interpretation of the
-      // physical type
-      case LogicalType::Type::FLOAT16:
-        if (LogicalTypeId(raw_other) != logical_id) {
-          return false;
-        }
-        break;
-      default:
-        break;
+    const auto other_logical_id = LogicalTypeId(raw_other);
+    // Only compare against logical types that influence the interpretation of the
+    // physical type
+    if (IsMeaningfulLogicalType(logical_id)) {
+      if (logical_id != other_logical_id) return false;
+    } else if (IsMeaningfulLogicalType(other_logical_id)) {
+      return false;
     }
 
     const auto& other = checked_cast<const TypedStatisticsImpl&>(raw_other);
@@ -922,7 +926,7 @@ std::shared_ptr<Comparator> DoMakeComparator(Type::type physical_type,
       case Type::FIXED_LEN_BYTE_ARRAY:
         if (logical_type == LogicalType::Type::FLOAT16) {
           return std::make_shared<
-              TypedComparatorImpl<true, FLBAType, Float16CompareHelper>>();
+              TypedComparatorImpl<true, FLBAType, Float16CompareHelper>>(type_length);
         }
         return std::make_shared<TypedComparatorImpl<true, FLBAType>>(type_length);
       default:

From 87d121c264db07a807bfe5b05b6efb9895552d42 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Sun, 9 Jul 2023 18:13:56 -0400
Subject: [PATCH 12/37] Support reading/writing `arrow::HalfFloat`

---
 .../parquet/arrow/arrow_reader_writer_test.cc | 33 +++++---
 cpp/src/parquet/arrow/arrow_schema_test.cc    | 18 +----
 cpp/src/parquet/arrow/reader_internal.cc      | 80 +++++++++++++++++++
 cpp/src/parquet/arrow/schema.cc               |  5 ++
 cpp/src/parquet/arrow/schema_internal.cc      |  2 +
 cpp/src/parquet/arrow/test_util.h             | 13 ++-
 cpp/src/parquet/column_writer.cc              | 71 ++++++++++++++++
 7 files changed, 195 insertions(+), 27 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 4e23d0fab5c69..8f1c64b81322b 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -143,6 +143,8 @@ std::shared_ptr<const LogicalType> get_logical_type(const DataType& type) {
       return LogicalType::Date();
     case ArrowId::DATE64:
       return LogicalType::Date();
+    case ArrowId::HALF_FLOAT:
+      return LogicalType::Float16();
     case ArrowId::TIMESTAMP: {
       const auto& ts_type = static_cast<const ::arrow::TimestampType&>(type);
       const bool adjusted_to_utc = !(ts_type.timezone().empty());
@@ -220,6 +222,7 @@ ParquetType::type get_physical_type(const DataType& type) {
     case ArrowId::FIXED_SIZE_BINARY:
     case ArrowId::DECIMAL128:
     case ArrowId::DECIMAL256:
+    case ArrowId::HALF_FLOAT:
       return ParquetType::FIXED_LEN_BYTE_ARRAY;
     case ArrowId::DATE32:
       return ParquetType::INT32;
@@ -525,6 +528,9 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
           byte_width =
               static_cast<const ::arrow::FixedSizeBinaryType&>(values_type).byte_width();
           break;
+        case ::arrow::Type::HALF_FLOAT:
+          byte_width = sizeof(::arrow::HalfFloatType::c_type);
+          break;
         case ::arrow::Type::DECIMAL128:
         case ::arrow::Type::DECIMAL256: {
           const auto& decimal_type = static_cast<const DecimalType&>(values_type);
@@ -537,6 +543,9 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
     case ::arrow::Type::FIXED_SIZE_BINARY:
       byte_width = static_cast<const ::arrow::FixedSizeBinaryType&>(type).byte_width();
       break;
+    case ::arrow::Type::HALF_FLOAT:
+      byte_width = sizeof(::arrow::HalfFloatType::c_type);
+      break;
     case ::arrow::Type::DECIMAL128:
     case ::arrow::Type::DECIMAL256: {
       const auto& decimal_type = static_cast<const DecimalType&>(type);
@@ -840,12 +849,12 @@ typedef ::testing::Types<
     ::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type, ::arrow::UInt16Type,
     ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type,
     ::arrow::Date32Type, ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType,
-    ::arrow::BinaryType, ::arrow::FixedSizeBinaryType, DecimalWithPrecisionAndScale<1>,
-    DecimalWithPrecisionAndScale<5>, DecimalWithPrecisionAndScale<10>,
-    DecimalWithPrecisionAndScale<19>, DecimalWithPrecisionAndScale<23>,
-    DecimalWithPrecisionAndScale<27>, DecimalWithPrecisionAndScale<38>,
-    Decimal256WithPrecisionAndScale<39>, Decimal256WithPrecisionAndScale<56>,
-    Decimal256WithPrecisionAndScale<76>>
+    ::arrow::BinaryType, ::arrow::FixedSizeBinaryType, ::arrow::HalfFloatType,
+    DecimalWithPrecisionAndScale<1>, DecimalWithPrecisionAndScale<5>,
+    DecimalWithPrecisionAndScale<10>, DecimalWithPrecisionAndScale<19>,
+    DecimalWithPrecisionAndScale<23>, DecimalWithPrecisionAndScale<27>,
+    DecimalWithPrecisionAndScale<38>, Decimal256WithPrecisionAndScale<39>,
+    Decimal256WithPrecisionAndScale<56>, Decimal256WithPrecisionAndScale<76>>
     TestTypes;
 
 TYPED_TEST_SUITE(TestParquetIO, TestTypes);
@@ -916,9 +925,15 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) {
 }
 
 TYPED_TEST(TestParquetIO, SingleColumnOptionalDictionaryWrite) {
-  // Skip tests for BOOL as we don't create dictionaries for it.
-  if (TypeParam::type_id == ::arrow::Type::BOOL) {
-    return;
+  switch (TypeParam::type_id) {
+    // Skip tests for BOOL as we don't create dictionaries for it.
+    case ::arrow::Type::BOOL:
+    // Skip tests for HALF_FLOAT as it's not currently supported by `dictionary_encode`
+    case ::arrow::Type::HALF_FLOAT:
+      GTEST_SKIP();
+      break;
+    default:
+      break;
   }
 
   std::shared_ptr<Array> values;
diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index f11101eb24298..a1cc989ba8ea0 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -851,6 +851,8 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
        ParquetType::FIXED_LEN_BYTE_ARRAY, 7},
       {"decimal(32, 8)", ::arrow::decimal(32, 8), LogicalType::Decimal(32, 8),
        ParquetType::FIXED_LEN_BYTE_ARRAY, 14},
+      {"float16", ::arrow::float16(), LogicalType::Float16(),
+       ParquetType::FIXED_LEN_BYTE_ARRAY, 2},
       {"time32", ::arrow::time32(::arrow::TimeUnit::MILLI),
        LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), ParquetType::INT32, -1},
       {"time64(microsecond)", ::arrow::time64(::arrow::TimeUnit::MICRO),
@@ -906,22 +908,6 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
   // ASSERT_NO_FATAL_FAILURE();
 }
 
-TEST_F(TestConvertArrowSchema, ArrowNonconvertibleFields) {
-  struct FieldConstructionArguments {
-    std::string name;
-    std::shared_ptr<::arrow::DataType> datatype;
-  };
-
-  std::vector<FieldConstructionArguments> cases = {
-      {"float16", ::arrow::float16()},
-  };
-
-  for (const FieldConstructionArguments& c : cases) {
-    auto field = ::arrow::field(c.name, c.datatype);
-    ASSERT_RAISES(NotImplemented, ConvertSchema({field}));
-  }
-}
-
 TEST_F(TestConvertArrowSchema, ParquetFlatPrimitivesAsDictionaries) {
   std::vector<NodePtr> parquet_fields;
   std::vector<std::shared_ptr<Field>> arrow_fields;
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index 5146aa12c2c36..f4e3a89e71a31 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -42,6 +42,7 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/endian.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
@@ -82,6 +83,7 @@ using ::arrow::bit_util::FromBigEndian;
 using ::arrow::internal::checked_cast;
 using ::arrow::internal::checked_pointer_cast;
 using ::arrow::internal::SafeLeftShift;
+using ::arrow::util::Float16;
 using ::arrow::util::SafeLoadAs;
 
 using parquet::internal::BinaryRecordReader;
@@ -713,6 +715,77 @@ Status TransferDecimal(RecordReader* reader, MemoryPool* pool,
   return Status::OK();
 }
 
+static inline Status ConvertToHalfFloat(const Array& array,
+                                        const std::shared_ptr<DataType>& type,
+                                        MemoryPool* pool, std::shared_ptr<Array>* out) {
+  constexpr int32_t byte_width = sizeof(uint16_t);
+  DCHECK_EQ(checked_cast<const ::arrow::HalfFloatType&>(*type).byte_width(), byte_width);
+
+  // We read the halffloat (uint16_t) bytes from a raw binary array, in which they're
+  // assumed to be little-endian.
+  const auto& binary_array = checked_cast<const ::arrow::FixedSizeBinaryArray&>(array);
+  DCHECK_EQ(checked_cast<const ::arrow::FixedSizeBinaryType&>(*binary_array.type())
+                .byte_width(),
+            byte_width);
+
+  // Number of elements in the halffloat array
+  const int64_t length = binary_array.length();
+  // Allocate data for the output halffloat array
+  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * byte_width, pool));
+  uint8_t* out_ptr = data->mutable_data();
+
+  const int64_t null_count = binary_array.null_count();
+  // Copy the values to the output array in native-endian format
+  if (null_count > 0) {
+    for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) {
+      Float16 f16{0};
+      if (binary_array.IsValid(i)) {
+        const uint8_t* in_ptr = binary_array.GetValue(i);
+        f16 = Float16::FromLittleEndian(in_ptr);
+      }
+      f16.ToBytes(out_ptr);
+    }
+  } else {
+#if ARROW_LITTLE_ENDIAN
+    // No need to byte-swap, so do a simple copy
+    std::memcpy(out_ptr, binary_array.raw_values(), length * byte_width);
+#else
+    for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) {
+      const uint8_t* in_ptr = binary_array.GetValue(i);
+      Float16::FromLittleEndian(in_ptr).ToBytes(out_ptr);
+    }
+#endif
+  }
+
+  *out = std::make_shared<::arrow::HalfFloatArray>(
+      type, length, std::move(data), binary_array.null_bitmap(), null_count);
+  return Status::OK();
+}
+
+/// \brief Convert an arrow::BinaryArray to an arrow::HalfFloatArray
+/// We do this by:
+/// 1. Creating an arrow::BinaryArray from the RecordReader's builder
+/// 2. Allocating a buffer for the arrow::HalfFloatArray
+/// 3. Converting the little-endian bytes in each BinaryArray entry to native-endian
+/// halffloat (uint16_t) values
+Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool,
+                         const std::shared_ptr<Field>& field, Datum* out) {
+  auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader);
+  DCHECK(binary_reader);
+  ::arrow::ArrayVector chunks = binary_reader->GetBuilderChunks();
+  for (size_t i = 0; i < chunks.size(); ++i) {
+    std::shared_ptr<Array> chunk_as_half;
+    RETURN_NOT_OK(ConvertToHalfFloat(*chunks[i], field->type(), pool, &chunk_as_half));
+    // Replace the chunk, which will hopefully also free memory as we go
+    chunks[i] = chunk_as_half;
+  }
+  if (!field->nullable()) {
+    ReconstructChunksWithoutNulls(&chunks);
+  }
+  *out = std::make_shared<ChunkedArray>(chunks, field->type());
+  return Status::OK();
+}
+
 }  // namespace
 
 #define TRANSFER_INT32(ENUM, ArrowType)                                               \
@@ -772,6 +845,13 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& va
       RETURN_NOT_OK(TransferBinary(reader, pool, value_field, &chunked_result));
       result = chunked_result;
     } break;
+    case ::arrow::Type::HALF_FLOAT: {
+      if (descr->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+        return Status::Invalid("Physical type for ", value_field->type()->ToString(),
+                               " must be fixed length binary");
+      }
+      RETURN_NOT_OK(TransferHalfFloat(reader, pool, value_field, &result));
+    } break;
     case ::arrow::Type::DECIMAL128: {
       switch (descr->physical_type()) {
         case ::parquet::Type::INT32: {
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 3323b7ff8b608..f5484f131eb07 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -397,6 +397,11 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     case ArrowTypeId::DURATION:
       type = ParquetType::INT64;
       break;
+    case ArrowTypeId::HALF_FLOAT:
+      type = ParquetType::FIXED_LEN_BYTE_ARRAY;
+      logical_type = LogicalType::Float16();
+      length = sizeof(uint16_t);
+      break;
     case ArrowTypeId::STRUCT: {
       auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type());
       return StructToNode(struct_type, name, field->nullable(), field_id, properties,
diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
index da0427cb31000..bb75cce084097 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -130,6 +130,8 @@ Result<std::shared_ptr<ArrowType>> FromFLBA(const LogicalType& logical_type,
   switch (logical_type.type()) {
     case LogicalType::Type::DECIMAL:
       return MakeArrowDecimal(logical_type);
+    case LogicalType::Type::FLOAT16:
+      return ::arrow::float16();
     case LogicalType::Type::NONE:
     case LogicalType::Type::INTERVAL:
     case LogicalType::Type::UUID:
diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index 16c03130c9672..16a0d24a22497 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -201,8 +201,17 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
     size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<Array>* out) {
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
-  ::arrow::random_real(size, seed, static_cast<c_type>(-1e10), static_cast<c_type>(1e10),
-                       &values);
+  if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
+    std::vector<int16_t> signed_values;
+    constexpr int16_t min = 0xf0e2;  // -1e4
+    constexpr int16_t max = 0x70e2;  // +1e4
+    ::arrow::randint(size, min, max, &signed_values);
+    std::transform(signed_values.begin(), signed_values.end(), std::back_inserter(values),
+                   [](int16_t v) { return static_cast<uint16_t>(v); });
+  } else {
+    ::arrow::random_real(size, seed, static_cast<c_type>(-1e10),
+                         static_cast<c_type>(1e10), &values);
+  }
   std::vector<uint8_t> valid_bytes(size, 1);
 
   for (size_t i = 0; i < num_nulls; i++) {
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 5dff533c1cce2..715432d005492 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -39,6 +39,7 @@
 #include "arrow/util/compression.h"
 #include "arrow/util/crc32.h"
 #include "arrow/util/endian.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/rle_encoding.h"
 #include "arrow/util/type_traits.h"
@@ -65,6 +66,7 @@ using arrow::Status;
 using arrow::bit_util::BitWriter;
 using arrow::internal::checked_cast;
 using arrow::internal::checked_pointer_cast;
+using arrow::util::Float16;
 using arrow::util::RleEncoder;
 
 namespace bit_util = arrow::bit_util;
@@ -2295,6 +2297,74 @@ struct SerializeFunctor<
   int64_t* scratch;
 };
 
+// ----------------------------------------------------------------------
+// Write Arrow to Float16
+
+// Requires a custom serializer because Float16s in Parquet are stored as a 2-byte
+// (little-endian) FLBA, whereas in Arrow they're a native `uint16_t`. Also, a temporary
+// buffer is needed if there's an endian mismatch.
+template <>
+struct SerializeFunctor<::parquet::FLBAType, ::arrow::HalfFloatType> {
+  Status Serialize(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx,
+                   FLBA* out) {
+#if ARROW_LITTLE_ENDIAN
+    return SerializeInPlace(array, ctx, out);
+#else
+    return SerializeWithScratch(array, ctx, out);
+#endif
+  }
+
+  Status SerializeInPlace(const ::arrow::HalfFloatArray& array, ArrowWriteContext*,
+                          FLBA* out) {
+    const uint16_t* values = array.raw_values();
+    if (array.null_count() == 0) {
+      for (int64_t i = 0; i < array.length(); ++i) {
+        out[i] = ToFLBA(&values[i]);
+      }
+    } else {
+      for (int64_t i = 0; i < array.length(); ++i) {
+        out[i] = array.IsValid(i) ? ToFLBA(&values[i]) : FLBA{};
+      }
+    }
+    return Status::OK();
+  }
+
+  Status SerializeWithScratch(const ::arrow::HalfFloatArray& array,
+                              ArrowWriteContext* ctx, FLBA* out) {
+    AllocateScratch(array, ctx);
+    if (array.null_count() == 0) {
+      for (int64_t i = 0; i < array.length(); ++i) {
+        out[i] = ToFLBA(array.Value(i));
+      }
+    } else {
+      for (int64_t i = 0; i < array.length(); ++i) {
+        out[i] = array.IsValid(i) ? ToFLBA(array.Value(i)) : FLBA{};
+      }
+    }
+    return Status::OK();
+  }
+
+ private:
+  FLBA ToFLBA(const uint16_t* value_ptr) const {
+    return FLBA{reinterpret_cast<const uint8_t*>(value_ptr)};
+  }
+  FLBA ToFLBA(uint16_t value) {
+    auto* out = reinterpret_cast<uint8_t*>(scratch_++);
+    Float16(value).ToLittleEndian(out);
+    return FLBA{out};
+  }
+
+  void AllocateScratch(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx) {
+    int64_t non_null_count = array.length() - array.null_count();
+    int64_t size = non_null_count * sizeof(uint16_t);
+    scratch_buffer_ = AllocateBuffer(ctx->memory_pool, size);
+    scratch_ = reinterpret_cast<uint16_t*>(scratch_buffer_->mutable_data());
+  }
+
+  std::shared_ptr<ResizableBuffer> scratch_buffer_;
+  uint16_t* scratch_;
+};
+
 template <>
 Status TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
     const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
@@ -2303,6 +2373,7 @@ Status TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
     WRITE_SERIALIZE_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryType, FLBAType)
     WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, FLBAType)
     WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, FLBAType)
+    WRITE_SERIALIZE_CASE(HALF_FLOAT, HalfFloatType, FLBAType)
     default:
       break;
   }

From a064bec3bcd80901c79a38a66e9775da4673c16f Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Mon, 10 Jul 2023 17:44:20 -0400
Subject: [PATCH 13/37] Fix MSVC truncation warning

---
 cpp/src/parquet/arrow/test_util.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index 16a0d24a22497..1ca221c0fb8f6 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -203,8 +203,8 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
     std::vector<int16_t> signed_values;
-    constexpr int16_t min = 0xf0e2;  // -1e4
-    constexpr int16_t max = 0x70e2;  // +1e4
+    constexpr auto min = static_cast<int16_t>(0xf0e2);  // -1e4
+    constexpr auto max = static_cast<int16_t>(0x70e2);  // +1e4
     ::arrow::randint(size, min, max, &signed_values);
     std::transform(signed_values.begin(), signed_values.end(), std::back_inserter(values),
                    [](int16_t v) { return static_cast<uint16_t>(v); });

From 6b3d61ca99f00284e9af63a77afef006ca928c91 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Mon, 17 Jul 2023 21:01:33 -0400
Subject: [PATCH 14/37] Fix test input generation

---
 cpp/src/parquet/arrow/test_util.h | 43 ++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index 1ca221c0fb8f6..74b8a36df8592 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -65,12 +65,44 @@ struct Decimal256WithPrecisionAndScale {
   static constexpr int32_t scale = PRECISION - 1;
 };
 
+inline std::vector<uint16_t> RandomHalfFloatValues(size_t size, uint16_t min,
+                                                   uint16_t max) {
+  auto to_signed = [](uint16_t in) -> int16_t {
+    // Clamp magnitude to exclude representations of NaN/infinity. Within this range,
+    // binary float16s have the same ordering as int16s after conversion.
+    int16_t out = static_cast<int16_t>(std::max(in & 0x7fff, 0x7bff));
+    // Negate if sign bit is set
+    return (in & 0x8000) != 0 ? -out : out;
+  };
+  auto to_unsigned = [](int16_t in) -> uint16_t {
+    uint16_t out = static_cast<uint16_t>(std::abs(in));
+    // Set sign bit if negative
+    return in < 0 ? (out | 0x8000) : out;
+  };
+
+  const auto signed_min = to_signed(min);
+  const auto signed_max = to_signed(max);
+  std::vector<int16_t> signed_values;
+  ::arrow::randint(size, signed_min, signed_max, &signed_values);
+
+  std::vector<uint16_t> values(signed_values.size());
+  std::transform(signed_values.begin(), signed_values.end(), values.begin(), to_unsigned);
+  return values;
+}
+
 template <class ArrowType>
 ::arrow::enable_if_floating_point<ArrowType, Status> NonNullArray(
     size_t size, std::shared_ptr<Array>* out) {
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
-  ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1), &values);
+  if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
+    constexpr uint16_t min = 0x0000;  // 0.0
+    constexpr uint16_t max = 0x3c00;  // 1.0
+    values = RandomHalfFloatValues(size, min, max);
+  } else {
+    ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1),
+                         &values);
+  }
   ::arrow::NumericBuilder<ArrowType> builder;
   RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
   return builder.Finish(out);
@@ -202,12 +234,9 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
-    std::vector<int16_t> signed_values;
-    constexpr auto min = static_cast<int16_t>(0xf0e2);  // -1e4
-    constexpr auto max = static_cast<int16_t>(0x70e2);  // +1e4
-    ::arrow::randint(size, min, max, &signed_values);
-    std::transform(signed_values.begin(), signed_values.end(), std::back_inserter(values),
-                   [](int16_t v) { return static_cast<uint16_t>(v); });
+    constexpr uint16_t min = 0xf0e2;  // -1e4
+    constexpr uint16_t max = 0x70e2;  // +1e4
+    values = RandomHalfFloatValues(size, min, max);
   } else {
     ::arrow::random_real(size, seed, static_cast<c_type>(-1e10),
                          static_cast<c_type>(1e10), &values);

From 17581057a9accdcd8583329926bac842533745f3 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Mon, 21 Aug 2023 19:04:36 -0400
Subject: [PATCH 15/37] Support conversions to/from float32

---
 cpp/src/arrow/util/float16.cc      | 163 ++++++++++++++++++++++++++++-
 cpp/src/arrow/util/float16.h       |   8 +-
 cpp/src/arrow/util/float16_test.cc |  91 ++++++++++++++++
 3 files changed, 259 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index 825cbf0cb1fa3..47e90f1ba050a 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -18,11 +18,172 @@
 #include <ostream>
 
 #include "arrow/util/float16.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace util {
 
-std::ostream& operator<<(std::ostream& os, Float16Base arg) { return (os << arg.bits()); }
+namespace {
+
+// --------------------------------------------------------
+// Binary conversions
+// --------------------------------------------------------
+// These routines are partially adapted from Numpy's C implementation
+//
+// Some useful metrics for conversions between different precisions:
+// |-----------------------------------------|
+// | precision | half    | single  | double  |
+// |-----------------------------------------|
+// | mantissa  | 10 bits | 23 bits | 53 bits |
+// | exponent  | 5 bits  | 8 bits  | 11 bits |
+// | sign      | 1 bit   | 1 bit   | 1 bit   |
+// | exp bias  | 15      | 127     | 1023    |
+// |-----------------------------------------|
+
+// Converts a IEEE binary32 into a binary16. Rounds to nearest with ties to zero
+uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) {
+  // Sign mask for output binary16
+  const uint16_t h_sign = uint16_t((f_bits >> 16) & 0x8000);
+
+  // Exponent mask for input binary32
+  const uint32_t f_exp = f_bits & 0x7f800000u;
+  // Exponents as signed pre-shifted values for convenience. Here, we need to re-bias the
+  // binary32 exponent for a binary16. If, after re-biasing, the binary16 exponent falls
+  // outside of the range [1,30] then we need to handle the under/overflow case specially.
+  const int16_t f_biased_exp = int16_t(f_exp >> 23);
+  const int16_t unbiased_exp = f_biased_exp - 127;
+  const int16_t h_biased_exp = unbiased_exp + 15;
+
+  // Mantissa mask for input binary32
+  const uint32_t f_mant = f_bits & 0x007fffffu;
+
+  // Handle exponent overflow, NaN, and +/-Inf
+  if (h_biased_exp >= 0x1f) {
+    // The binary32 is a NaN representation
+    if (f_biased_exp == 0xff && f_mant != 0) {
+      uint16_t h_mant = uint16_t(f_mant >> 13);
+      // If the mantissa bit(s) indicating NaN were shifted out, add one back. Otherwise,
+      // the result would be infinity.
+      if (h_mant == 0) {
+        h_mant = 0x1;
+      }
+      return uint16_t(h_sign | 0x7c00u | h_mant);
+    }
+
+    // Clamp to +/-infinity
+    return uint16_t(h_sign | 0x7c00u);
+  }
+
+  // Handle exponent underflow, subnormals, and +/-0
+  if (h_biased_exp <= 0) {
+    // If the underflow exceeds the number of bits in a binary16 mantissa (10) then we
+    // can't round, so just clamp to 0. Note that this also weeds out any binary32 values
+    // that are subnormal - including +/-0;
+    if (h_biased_exp < -10) {
+      return h_sign;
+    }
+
+    // Convert to a rounded subnormal value starting with the mantissa. Since the input
+    // binary32 is known to be normal at this point, we need to prepend its implicit
+    // leading bit - which also necessitates an additional right-shift.
+    uint32_t rounded_mant = 0x800000u | f_mant;
+    rounded_mant >>= (1 - h_biased_exp);
+
+    // Here, we implement rounding to nearest (with ties to even)
+    //
+    // By now, our new mantissa has two conceptual ranges:
+    //  - The lower 13 bits, which will be shifted out
+    //  - The upper 10 bits, which will become the binary16's mantissa
+    //
+    // We define a "rounding bit", which is the most significant bit to be dropped
+    // (0x1000). "Rounding to nearest" basically just means that we add 1 to the rounding
+    // bit. If it's set, then the bit will cascade upwards into the 10-bit mantissa (and
+    // potentially the exponent).
+    //
+    // The only time where we may NOT do this is when a "tie" occurs - i.e. when the
+    // rounding bit is set but all of the lower bits are 0. In that case, we don't add 1
+    // if the retained mantissa is "even" (its least significant bit is 0).
+    if ((rounded_mant & 0x3fffu) != 0x1000u || (f_mant & 0x7ffu) != 0) {
+      rounded_mant += 0x1000u;
+    }
+
+    const uint16_t h_mant = uint16_t(rounded_mant >> 13);
+    return h_sign + h_mant;
+  }
+
+  const uint16_t h_exp = uint16_t(h_biased_exp) << 10;
+
+  // See comment on rounding behavior above
+  uint32_t rounded_mant = f_mant;
+  if ((rounded_mant & 0x3fffu) != 0x1000u) {
+    rounded_mant += 0x1000u;
+  }
+
+  const uint16_t h_mant = uint16_t(rounded_mant >> 13);
+  // Note that we ADD (rather than OR) the components because we want the carryover bit
+  // from rounding the mantissa to cascade through the exponent (it shouldn't affect the
+  // sign bit though).
+  return h_sign + h_exp + h_mant;
+}
+
+// Converts a IEEE binary16 into a binary32
+uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) {
+  // Sign mask for output binary32
+  const uint32_t f_sign = uint32_t(h_bits & 0x8000u) << 16;
+
+  // Exponent mask for input binary16
+  const uint16_t h_exp = h_bits & 0x7c00;
+  // Mantissa mask for input binary16
+  const uint16_t h_mant = h_bits & 0x3ffu;
+
+  switch (h_exp) {
+    // Handle Inf and NaN
+    case 0x7c00u:
+      return f_sign | 0x7f800000u | (uint32_t(h_mant) << 13);
+    // Handle zeros and subnormals
+    case 0x0000u: {
+      // Input is +/-0
+      if (h_mant == 0) {
+        return f_sign;
+      }
+      // Subnormal binary16 to normal binary32
+      //
+      // Start with an f32-biased exponent of 2^-15. We then decrement it until the most
+      // significant set bit is left-shifted out - as it doesn't get explicitly stored in
+      // normalized floating point values. Instead, its existence is implied by the new
+      // exponent.
+      uint32_t f_exp = 127 - 15;
+      uint32_t f_mant = uint32_t(h_mant) << 1;
+      while ((f_mant & 0x0400u) == 0) {
+        --f_exp;
+        f_mant <<= 1;
+      }
+      f_exp <<= 23;
+      f_mant = (f_mant & 0x03ffu) << 13;
+      return f_sign | f_exp | f_mant;
+    } break;
+    // Handle normals
+    default:
+      // Equivalent to adding (127 - 15) to the exponent and shifting everything by 13.
+      return f_sign | ((uint32_t(h_bits & 0x7fffu) + 0x1c000u) << 13);
+  }
+}
+
+}  // namespace
+
+float Float16Base::ToFloat() const {
+  const uint32_t f_bits = Binary16BitsToBinary32Bits(value_);
+  return SafeCopy<float>(f_bits);
+}
+
+Float16 Float16::FromFloat(float f) {
+  const uint32_t f_bits = SafeCopy<uint32_t>(f);
+  return Float16{Binary32BitsToBinary16Bits(f_bits)};
+}
+
+std::ostream& operator<<(std::ostream& os, Float16Base arg) {
+  return (os << arg.ToFloat());
+}
 
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index 74308a09a3cfd..4d431291c1422 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -40,7 +40,7 @@ namespace util {
 ///
 /// NOTE: Methods in the class should not mutate the unerlying value or produce copies.
 /// Such functionality is delegated to subclasses.
-class Float16Base {
+class ARROW_EXPORT Float16Base {
  public:
   Float16Base() = default;
   constexpr explicit Float16Base(uint16_t value) : value_(value) {}
@@ -98,6 +98,8 @@ class Float16Base {
 #endif
   }
 
+  float ToFloat() const;
+
   friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) {
     if (lhs.is_nan() || rhs.is_nan()) return false;
     return Float16Base::CompareEq(lhs, rhs);
@@ -149,13 +151,15 @@ class Float16Base {
 };
 
 /// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t`
-class Float16 : public Float16Base {
+class ARROW_EXPORT Float16 : public Float16Base {
  public:
   using Float16Base::Float16Base;
 
   constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); }
   constexpr Float16 operator+() const { return Float16(value_); }
 
+  static Float16 FromFloat(float f);
+
   /// \brief Read a `Float16` from memory in native-endian byte order
   static Float16 FromBytes(const uint8_t* src) {
     return Float16(SafeLoadAs<uint16_t>(src));
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index 1ccb9db7b0e25..2a7ebd1c5b435 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <array>
+#include <cmath>
 #include <utility>
 #include <vector>
 
@@ -33,6 +34,96 @@ namespace {
 template <typename T>
 using Limits = std::numeric_limits<T>;
 
+float F32(uint32_t bits) { return SafeCopy<float>(bits); }
+
+TEST(Float16Test, RoundTripFromFloat32) {
+  struct TestCase {
+    float f32;
+    uint16_t b16;
+    float f16_as_f32;
+  };
+  // Expected values were also manually validated with numpy-1.24.3
+  const TestCase test_cases[] = {
+      // +/-0.0f
+      {F32(0x80000000u), 0b1000000000000000u, -0.0f},
+      {F32(0x00000000u), 0b0000000000000000u, +0.0f},
+      // 32-bit exp is 102 => 2^-25. Rounding to nearest.
+      {F32(0xb3000001u), 0b1000000000000001u, -5.96046447754e-8f},
+      // 32-bit exp is 102 => 2^-25. Rounding to even.
+      {F32(0xb3000000u), 0b1000000000000000u, -0.0f},
+      // 32-bit exp is 101 => 2^-26. Underflow to zero.
+      {F32(0xb2800001u), 0b1000000000000000u, -0.0f},
+      // 32-bit exp is 108 => 2^-19.
+      {F32(0xb61a0000u), 0b1000000000100110u, -2.26497650146e-6f},
+      // 32-bit exp is 108 => 2^-19.
+      {F32(0xb61e0000u), 0b1000000000101000u, -2.38418579102e-6f},
+      // 32-bit exp is 112 => 2^-15. Rounding to nearest.
+      {F32(0xb87fa001u), 0b1000001111111111u, -6.09755516052e-5f},
+      // 32-bit exp is 112 => 2^-15. Rounds to 16-bit exp of 1 => 2^-14
+      {F32(0xb87fe001u), 0b1000010000000000u, -6.103515625e-5f},
+      // 32-bit exp is 142 => 2^15. Rounding to nearest.
+      {F32(0xc7001001u), 0b1111100000000001u, -32800.0f},
+      // 32-bit exp is 142 => 2^15. Rounding to even.
+      {F32(0xc7001000u), 0b1111100000000000u, -32768.0f},
+      // 65520.0f rounds to inf
+      {F32(0x477ff000u), 0b0111110000000000u, Limits<float>::infinity()},
+      // 65488.0039062f rounds to 65504.0 (float16 max)
+      {F32(0x477fd001u), 0b0111101111111111u, 65504.0f},
+      // 32-bit exp is 127 => 2^0, rounds to 16-bit exp of 16 => 2^1.
+      {F32(0xbffff000u), 0b1100000000000000u, -2.0f},
+  };
+
+  for (size_t index = 0; index < std::size(test_cases); ++index) {
+    ARROW_SCOPED_TRACE("index=", index);
+    const auto& tc = test_cases[index];
+    const auto f16 = Float16::FromFloat(tc.f32);
+    EXPECT_EQ(tc.b16, f16.bits());
+    EXPECT_EQ(tc.f16_as_f32, f16.ToFloat());
+  }
+}
+
+TEST(Float16Test, RoundTripFromFloat32Nan) {
+  const float nan_test_cases[] = {
+      Limits<float>::quiet_NaN(), F32(0x7f800001u), F32(0xff800001u), F32(0x7fc00000u),
+      F32(0xff800001u),           F32(0x7fffffffu), F32(0xffffffffu)};
+
+  for (size_t i = 0; i < std::size(nan_test_cases); ++i) {
+    ARROW_SCOPED_TRACE("i=", i);
+    const auto f32 = nan_test_cases[i];
+
+    ASSERT_TRUE(std::isnan(f32));
+    const bool sign = std::signbit(f32);
+
+    const auto f16 = Float16::FromFloat(f32);
+    EXPECT_TRUE(f16.is_nan());
+    EXPECT_EQ(sign, f16.signbit());
+
+    const auto f16_as_f32 = f16.ToFloat();
+    EXPECT_TRUE(std::isnan(f16_as_f32));
+    EXPECT_EQ(sign, std::signbit(f16_as_f32));
+  }
+}
+
+TEST(Float16Test, RoundTripFromFloat32Inf) {
+  const float test_cases[] = {+Limits<float>::infinity(), -Limits<float>::infinity()};
+
+  for (size_t i = 0; i < std::size(test_cases); ++i) {
+    ARROW_SCOPED_TRACE("i=", i);
+    const auto f32 = test_cases[i];
+
+    ASSERT_TRUE(std::isinf(f32));
+    const bool sign = std::signbit(f32);
+
+    const auto f16 = Float16::FromFloat(f32);
+    EXPECT_TRUE(f16.is_infinity());
+    EXPECT_EQ(sign, f16.signbit());
+
+    const auto f16_as_f32 = f16.ToFloat();
+    EXPECT_TRUE(std::isinf(f16_as_f32));
+    EXPECT_EQ(sign, std::signbit(f16_as_f32));
+  }
+}
+
 // Holds a float16 and its equivalent float32
 struct TestValue {
   TestValue(Float16 f16, float f32) : f16(f16), f32(f32) {}

From d41a0c579d7b4deacb210805fb2b3fe0e2d84e7e Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Mon, 21 Aug 2023 19:53:40 -0400
Subject: [PATCH 16/37] Remove `Float16Base` class

---
 cpp/src/arrow/util/float16.cc      |   6 +-
 cpp/src/arrow/util/float16.h       | 100 +++++++++++++----------------
 cpp/src/parquet/statistics_test.cc |  22 +++----
 3 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index 47e90f1ba050a..560a5e270396d 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -171,7 +171,7 @@ uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) {
 
 }  // namespace
 
-float Float16Base::ToFloat() const {
+float Float16::ToFloat() const {
   const uint32_t f_bits = Binary16BitsToBinary32Bits(value_);
   return SafeCopy<float>(f_bits);
 }
@@ -181,9 +181,7 @@ Float16 Float16::FromFloat(float f) {
   return Float16{Binary32BitsToBinary16Bits(f_bits)};
 }
 
-std::ostream& operator<<(std::ostream& os, Float16Base arg) {
-  return (os << arg.ToFloat());
-}
+std::ostream& operator<<(std::ostream& os, Float16 arg) { return (os << arg.ToFloat()); }
 
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index 4d431291c1422..d36d164f8fbad 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -31,19 +31,35 @@
 namespace arrow {
 namespace util {
 
-/// \brief Base class for an IEEE half-precision float, encoded as a `uint16_t`
+/// \brief Class representing an IEEE half-precision float, encoded as a `uint16_t`
 ///
-/// The exact format is as follows (from MSB to LSB):
-/// - bit 0:     sign
-/// - bits 1-5:  exponent
-/// - bits 6-15: mantissa
+/// The exact format is as follows (from LSB to MSB):
+/// - bits 0-10:  mantissa
+/// - bits 10-15: exponent
+/// - bit 15:     sign
 ///
-/// NOTE: Methods in the class should not mutate the unerlying value or produce copies.
-/// Such functionality is delegated to subclasses.
-class ARROW_EXPORT Float16Base {
+class ARROW_EXPORT Float16 {
  public:
-  Float16Base() = default;
-  constexpr explicit Float16Base(uint16_t value) : value_(value) {}
+  Float16() = default;
+  constexpr explicit Float16(uint16_t value) : value_(value) {}
+
+  /// \brief Create a `Float16` from a 32-bit float (may lose precision)
+  static Float16 FromFloat(float f);
+
+  /// \brief Read a `Float16` from memory in native-endian byte order
+  static Float16 FromBytes(const uint8_t* src) {
+    return Float16(SafeLoadAs<uint16_t>(src));
+  }
+
+  /// \brief Read a `Float16` from memory in little-endian byte order
+  static Float16 FromLittleEndian(const uint8_t* src) {
+    return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
+  }
+
+  /// \brief Read a `Float16` from memory in big-endian byte order
+  static Float16 FromBigEndian(const uint8_t* src) {
+    return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
+  }
 
   /// \brief Return the value's integer representation
   constexpr uint16_t bits() const { return value_; }
@@ -61,6 +77,9 @@ class ARROW_EXPORT Float16Base {
   /// \brief Return true if the value is positive/negative zero
   constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }
 
+  /// \brief Convert to a 32-bit float
+  float ToFloat() const;
+
   /// \brief Copy the value's bytes in native-endian byte order
   void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }
   /// \brief Return the value's bytes in native-endian byte order
@@ -74,7 +93,7 @@ class ARROW_EXPORT Float16Base {
 
   /// \brief Copy the value's bytes in little-endian byte order
   void ToLittleEndian(uint8_t* dest) const {
-    Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(dest);
+    Float16{bit_util::ToLittleEndian(value_)}.ToBytes(dest);
   }
   /// \brief Return the value's bytes in little-endian byte order
   constexpr std::array<uint8_t, 2> ToLittleEndian() const {
@@ -87,7 +106,7 @@ class ARROW_EXPORT Float16Base {
 
   /// \brief Copy the value's bytes in big-endian byte order
   void ToBigEndian(uint8_t* dest) const {
-    Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(dest);
+    Float16{bit_util::ToBigEndian(value_)}.ToBytes(dest);
   }
   /// \brief Return the value's bytes in big-endian byte order
   constexpr std::array<uint8_t, 2> ToBigEndian() const {
@@ -98,41 +117,38 @@ class ARROW_EXPORT Float16Base {
 #endif
   }
 
-  float ToFloat() const;
+  constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); }
+  constexpr Float16 operator+() const { return Float16(value_); }
 
-  friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) {
+  friend constexpr bool operator==(Float16 lhs, Float16 rhs) {
     if (lhs.is_nan() || rhs.is_nan()) return false;
-    return Float16Base::CompareEq(lhs, rhs);
-  }
-  friend constexpr bool operator!=(Float16Base lhs, Float16Base rhs) {
-    return !(lhs == rhs);
+    return Float16::CompareEq(lhs, rhs);
   }
+  friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); }
 
-  friend constexpr bool operator<(Float16Base lhs, Float16Base rhs) {
+  friend constexpr bool operator<(Float16 lhs, Float16 rhs) {
     if (lhs.is_nan() || rhs.is_nan()) return false;
-    return Float16Base::CompareLt(lhs, rhs);
+    return Float16::CompareLt(lhs, rhs);
   }
-  friend constexpr bool operator>(Float16Base lhs, Float16Base rhs) { return rhs < lhs; }
+  friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; }
 
-  friend constexpr bool operator<=(Float16Base lhs, Float16Base rhs) {
+  friend constexpr bool operator<=(Float16 lhs, Float16 rhs) {
     if (lhs.is_nan() || rhs.is_nan()) return false;
-    return !Float16Base::CompareLt(rhs, lhs);
-  }
-  friend constexpr bool operator>=(Float16Base lhs, Float16Base rhs) {
-    return rhs <= lhs;
+    return !Float16::CompareLt(rhs, lhs);
   }
+  friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; }
 
-  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16Base arg);
+  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg);
 
  protected:
   uint16_t value_;
 
  private:
   // Comparison helpers that assume neither operand is NaN
-  static constexpr bool CompareEq(Float16Base lhs, Float16Base rhs) {
+  static constexpr bool CompareEq(Float16 lhs, Float16 rhs) {
     return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero());
   }
-  static constexpr bool CompareLt(Float16Base lhs, Float16Base rhs) {
+  static constexpr bool CompareLt(Float16 lhs, Float16 rhs) {
     if (lhs.signbit()) {
       if (rhs.signbit()) {
         // Both are negative
@@ -150,32 +166,6 @@ class ARROW_EXPORT Float16Base {
   }
 };
 
-/// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t`
-class ARROW_EXPORT Float16 : public Float16Base {
- public:
-  using Float16Base::Float16Base;
-
-  constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); }
-  constexpr Float16 operator+() const { return Float16(value_); }
-
-  static Float16 FromFloat(float f);
-
-  /// \brief Read a `Float16` from memory in native-endian byte order
-  static Float16 FromBytes(const uint8_t* src) {
-    return Float16(SafeLoadAs<uint16_t>(src));
-  }
-
-  /// \brief Read a `Float16` from memory in little-endian byte order
-  static Float16 FromLittleEndian(const uint8_t* src) {
-    return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
-  }
-
-  /// \brief Read a `Float16` from memory in big-endian byte order
-  static Float16 FromBigEndian(const uint8_t* src) {
-    return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
-  }
-};
-
 static_assert(std::is_trivial_v<Float16>);
 
 }  // namespace util
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 7de4e3f3840bf..789c42f379799 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -63,22 +63,16 @@ using schema::PrimitiveNode;
 
 namespace test {
 
-class BufferedFloat16 : public ::arrow::util::Float16Base {
- public:
-  explicit BufferedFloat16(Float16 f16) : Float16Base(f16) {
-    buffer_ = *::arrow::AllocateBuffer(sizeof(value_));
-    ToLittleEndian(buffer_->mutable_data());
+struct BufferedFloat16 {
+  explicit BufferedFloat16(Float16 f16)
+      : f16(f16), buffer(*::arrow::AllocateBuffer(sizeof(uint16_t))) {
+    this->f16.ToLittleEndian(buffer->mutable_data());
   }
-  explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {}
-
-  const uint8_t* bytes() const { return buffer_->data(); }
-  const std::shared_ptr<::arrow::Buffer>& buffer() { return buffer_; }
-
-  BufferedFloat16 operator+() const { return *this; }
-  BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); }
+  explicit BufferedFloat16(uint16_t bits) : BufferedFloat16(Float16(bits)) {}
+  const uint8_t* bytes() const { return buffer->data(); }
 
- private:
-  std::shared_ptr<::arrow::Buffer> buffer_;
+  Float16 f16;
+  std::shared_ptr<::arrow::Buffer> buffer;
 };
 
 // ----------------------------------------------------------------------

From aaef4b4c76260dbf0d386f4d89332be4e40308e5 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Tue, 22 Aug 2023 18:23:12 -0400
Subject: [PATCH 17/37] Update/restructure comparison tests

---
 cpp/src/arrow/util/float16_test.cc | 135 +++++++++++++----------------
 1 file changed, 61 insertions(+), 74 deletions(-)

diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index 2a7ebd1c5b435..e13cecc0f9046 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -124,92 +124,79 @@ TEST(Float16Test, RoundTripFromFloat32Inf) {
   }
 }
 
-// Holds a float16 and its equivalent float32
-struct TestValue {
-  TestValue(Float16 f16, float f32) : f16(f16), f32(f32) {}
-  TestValue(uint16_t u16, float f32) : TestValue(Float16(u16), f32) {}
+TEST(Float16Test, Compare) {
+  constexpr float f32_inf = Limits<float>::infinity();
+  constexpr float f32_nan = Limits<float>::quiet_NaN();
 
-  Float16 f16;
-  float f32;
-};
-
-#define GENERATE_OPERATOR(NAME, OP)                              \
-  struct NAME {                                                  \
-    std::pair<bool, bool> operator()(TestValue l, TestValue r) { \
-      return std::make_pair((l.f32 OP r.f32), (l.f16 OP r.f16)); \
-    }                                                            \
-  }
-
-GENERATE_OPERATOR(CompareEq, ==);
-GENERATE_OPERATOR(CompareNe, !=);
-GENERATE_OPERATOR(CompareLt, <);
-GENERATE_OPERATOR(CompareGt, >);
-GENERATE_OPERATOR(CompareLe, <=);
-GENERATE_OPERATOR(CompareGe, >=);
-
-#undef GENERATE_OPERATOR
-
-const std::vector<TestValue> g_test_values = {
-    TestValue(Limits<Float16>::min(), +0.00006104f),
-    TestValue(Limits<Float16>::max(), +65504.0f),
-    TestValue(Limits<Float16>::lowest(), -65504.0f),
-    TestValue(+Limits<Float16>::infinity(), +Limits<float>::infinity()),
-    TestValue(-Limits<Float16>::infinity(), -Limits<float>::infinity()),
-    // Multiple (semantically equivalent) NaN representations
-    TestValue(0x7fff, Limits<float>::quiet_NaN()),
-    TestValue(0xffff, Limits<float>::quiet_NaN()),
-    TestValue(0x7e00, Limits<float>::quiet_NaN()),
-    TestValue(0xfe00, Limits<float>::quiet_NaN()),
-    // Positive/negative zeroes
-    TestValue(0x0000, +0.0f),
-    TestValue(0x8000, -0.0f),
-    // Miscellaneous values. In general, they're chosen to test the sign/exponent and
-    // exponent/mantissa boundaries
-    TestValue(0x101c, +0.000502f),
-    TestValue(0x901c, -0.000502f),
-    TestValue(0x101d, +0.0005022f),
-    TestValue(0x901d, -0.0005022f),
-    TestValue(0x121c, +0.000746f),
-    TestValue(0x921c, -0.000746f),
-    TestValue(0x141c, +0.001004f),
-    TestValue(0x941c, -0.001004f),
-    TestValue(0x501c, +32.9f),
-    TestValue(0xd01c, -32.9f),
-    // A few subnormals for good measure
-    TestValue(0x001c, +0.0000017f),
-    TestValue(0x801c, -0.0000017f),
-    TestValue(0x021c, +0.0000332f),
-    TestValue(0x821c, -0.0000332f),
-};
+  const struct {
+    Float16 f16;
+    float f32;
+  } test_values[] = {
+      {Limits<Float16>::min(), +6.103515625e-05f},
+      {Limits<Float16>::max(), +65504.0f},
+      {Limits<Float16>::lowest(), -65504.0f},
+      {+Limits<Float16>::infinity(), +f32_inf},
+      {-Limits<Float16>::infinity(), -f32_inf},
+      // Multiple (semantically equivalent) NaN representations
+      {Float16(0x7e00), f32_nan},
+      {Float16(0xfe00), f32_nan},
+      {Float16(0x7fff), f32_nan},
+      {Float16(0xffff), f32_nan},
+      // Positive/negative zeros
+      {Float16(0x0000), +0.0f},
+      {Float16(0x8000), -0.0f},
+      // Miscellaneous values. In general, they're chosen to test the sign/exponent and
+      // exponent/mantissa boundaries
+      {Float16(0x101c), +0.00050163269043f},
+      {Float16(0x901c), -0.00050163269043f},
+      {Float16(0x101d), +0.000502109527588f},
+      {Float16(0x901d), -0.000502109527588f},
+      {Float16(0x121c), +0.00074577331543f},
+      {Float16(0x921c), -0.00074577331543f},
+      {Float16(0x141c), +0.00100326538086f},
+      {Float16(0x941c), -0.00100326538086f},
+      {Float16(0x501c), +32.875f},
+      {Float16(0xd01c), -32.875f},
+      // A few subnormals for good measure
+      {Float16(0x001c), +1.66893005371e-06f},
+      {Float16(0x801c), -1.66893005371e-06f},
+      {Float16(0x021c), +3.21865081787e-05f},
+      {Float16(0x821c), -3.21865081787e-05f},
+  };
 
-template <typename Operator>
-class Float16OperatorTest : public ::testing::Test {
- public:
-  void TestCompare(const std::vector<TestValue>& test_values) {
-    const auto num_values = static_cast<int>(test_values.size());
+  auto expect_op = [&](std::string op_name, auto op) {
+    ARROW_SCOPED_TRACE(op_name);
+    const auto num_values = static_cast<int>(std::size(test_values));
 
     // Check all combinations of operands in both directions
     for (int i = 0; i < num_values; ++i) {
       for (int j = 0; j < num_values; ++j) {
-        ARROW_SCOPED_TRACE(i, ",", j);
-
-        auto a = test_values[i];
-        auto b = test_values[j];
+        auto [a16, a32] = test_values[i];
+        auto [b16, b32] = test_values[j];
+        ARROW_SCOPED_TRACE("[", i, ",", j, "] = ", a16, ",", b16);
 
         // Results for float16 and float32 should be the same
-        auto ret = Operator{}(a, b);
-        ASSERT_EQ(ret.first, ret.second);
+        ASSERT_EQ(op(a16, b16), op(a32, b32));
       }
     }
-  }
-};
-
-using OperatorTypes =
-    ::testing::Types<CompareEq, CompareNe, CompareLt, CompareGt, CompareLe, CompareGe>;
+  };
 
-TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes);
+  // Verify that our "equivalent" 16/32-bit values actually are
+  for (const auto& v : test_values) {
+    if (std::isnan(v.f32)) {
+      ASSERT_TRUE(std::isnan(v.f16.ToFloat()));
+    } else {
+      ASSERT_EQ(v.f32, v.f16.ToFloat());
+    }
+  }
 
-TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); }
+  expect_op("equal", [](auto l, auto r) { return l == r; });
+  expect_op("not_equal", [](auto l, auto r) { return l != r; });
+  expect_op("less", [](auto l, auto r) { return l < r; });
+  expect_op("greater", [](auto l, auto r) { return l > r; });
+  expect_op("less_equal", [](auto l, auto r) { return l <= r; });
+  expect_op("greater_equal", [](auto l, auto r) { return l >= r; });
+}
 
 TEST(Float16Test, ToBytes) {
   constexpr auto f16 = Float16(0xd01c);

From 9e5cf1444631169e758434154d85b9a2cbda690a Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Fri, 25 Aug 2023 20:30:49 -0400
Subject: [PATCH 18/37] Fix comment

---
 cpp/src/arrow/util/float16.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index 560a5e270396d..5bdcdfab7ec49 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -34,7 +34,7 @@ namespace {
 // |-----------------------------------------|
 // | precision | half    | single  | double  |
 // |-----------------------------------------|
-// | mantissa  | 10 bits | 23 bits | 53 bits |
+// | mantissa  | 10 bits | 23 bits | 52 bits |
 // | exponent  | 5 bits  | 8 bits  | 11 bits |
 // | sign      | 1 bit   | 1 bit   | 1 bit   |
 // | exp bias  | 15      | 127     | 1023    |

From e12498654a502fc71c448b7468cd010a98b7b9c2 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Fri, 25 Aug 2023 23:43:41 -0400
Subject: [PATCH 19/37] Minor changes to Float16 class/tests

---
 cpp/src/arrow/util/float16.h       | 12 ++++++++----
 cpp/src/arrow/util/float16_test.cc |  5 +++++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index d36d164f8fbad..7c8597a8ec542 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -53,18 +53,20 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Read a `Float16` from memory in little-endian byte order
   static Float16 FromLittleEndian(const uint8_t* src) {
-    return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
+    return Float16(::arrow::bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
   }
 
   /// \brief Read a `Float16` from memory in big-endian byte order
   static Float16 FromBigEndian(const uint8_t* src) {
-    return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
+    return Float16(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
   }
 
   /// \brief Return the value's integer representation
   constexpr uint16_t bits() const { return value_; }
   constexpr explicit operator uint16_t() const { return bits(); }
 
+  explicit operator float() const { return ToFloat(); }
+
   /// \brief Return true if the value is negative (sign bit is set)
   constexpr bool signbit() const { return (value_ & 0x8000) != 0; }
 
@@ -74,6 +76,8 @@ class ARROW_EXPORT Float16 {
   }
   /// \brief Return true if the value is positive/negative infinity
   constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; }
+  /// \brief Return true if the value is finite and not NaN
+  constexpr bool is_finite() const { return (value_ & 0x7c00) != 0x7c00; }
   /// \brief Return true if the value is positive/negative zero
   constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }
 
@@ -93,7 +97,7 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Copy the value's bytes in little-endian byte order
   void ToLittleEndian(uint8_t* dest) const {
-    Float16{bit_util::ToLittleEndian(value_)}.ToBytes(dest);
+    Float16{::arrow::bit_util::ToLittleEndian(value_)}.ToBytes(dest);
   }
   /// \brief Return the value's bytes in little-endian byte order
   constexpr std::array<uint8_t, 2> ToLittleEndian() const {
@@ -106,7 +110,7 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Copy the value's bytes in big-endian byte order
   void ToBigEndian(uint8_t* dest) const {
-    Float16{bit_util::ToBigEndian(value_)}.ToBytes(dest);
+    Float16{::arrow::bit_util::ToBigEndian(value_)}.ToBytes(dest);
   }
   /// \brief Return the value's bytes in big-endian byte order
   constexpr std::array<uint8_t, 2> ToBigEndian() const {
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index e13cecc0f9046..d69bf6954ce77 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -79,6 +79,11 @@ TEST(Float16Test, RoundTripFromFloat32) {
     const auto f16 = Float16::FromFloat(tc.f32);
     EXPECT_EQ(tc.b16, f16.bits());
     EXPECT_EQ(tc.f16_as_f32, f16.ToFloat());
+
+    EXPECT_EQ(std::signbit(tc.f16_as_f32), f16.signbit());
+    EXPECT_EQ(std::isnan(tc.f16_as_f32), f16.is_nan());
+    EXPECT_EQ(std::isinf(tc.f16_as_f32), f16.is_infinity());
+    EXPECT_EQ(std::isfinite(tc.f16_as_f32), f16.is_finite());
   }
 }
 

From a12176ff614929584aa0b3e98706ef6d45ed5f4f Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 31 Aug 2023 15:34:17 -0400
Subject: [PATCH 20/37] Update statistics and tests

---
 cpp/src/parquet/statistics.cc      | 15 ++++----
 cpp/src/parquet/statistics_test.cc | 59 ++++++++++++------------------
 2 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index a7691dd568796..73caf2b46f555 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -558,7 +558,8 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
       : descr_(descr),
         pool_(pool),
         min_buffer_(AllocateBuffer(pool_, 0)),
-        max_buffer_(AllocateBuffer(pool_, 0)) {
+        max_buffer_(AllocateBuffer(pool_, 0)),
+        logical_type_(LogicalTypeId(descr_)) {
     comparator_ = MakeComparator<DType>(descr);
     TypedStatisticsImpl::Reset();
   }
@@ -628,13 +629,12 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
   bool Equals(const Statistics& raw_other) const override {
     if (physical_type() != raw_other.physical_type()) return false;
 
-    const auto logical_id = LogicalTypeId(*this);
-    const auto other_logical_id = LogicalTypeId(raw_other);
+    const auto other_logical_type = LogicalTypeId(raw_other);
     // Only compare against logical types that influence the interpretation of the
     // physical type
-    if (IsMeaningfulLogicalType(logical_id)) {
-      if (logical_id != other_logical_id) return false;
-    } else if (IsMeaningfulLogicalType(other_logical_id)) {
+    if (IsMeaningfulLogicalType(logical_type_)) {
+      if (logical_type_ != other_logical_type) return false;
+    } else if (IsMeaningfulLogicalType(other_logical_type)) {
       return false;
     }
 
@@ -763,6 +763,7 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
   EncodedStatistics statistics_;
   std::shared_ptr<TypedComparator<DType>> comparator_;
   std::shared_ptr<ResizableBuffer> min_buffer_, max_buffer_;
+  LogicalType::Type::type logical_type_ = LogicalType::Type::NONE;
 
   void PlainEncode(const T& src, std::string* dst) const;
   void PlainDecode(const std::string& src, T* dst) const;
@@ -794,7 +795,7 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
 
   void SetMinMaxPair(std::pair<T, T> min_max) {
     // CleanStatistic can return a nullopt in case of erroneous values, e.g. NaN
-    auto maybe_min_max = CleanStatistic(min_max, LogicalTypeId(*this));
+    auto maybe_min_max = CleanStatistic(min_max, logical_type_);
     if (!maybe_min_max) return;
 
     auto min = maybe_min_max.value().first;
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 789c42f379799..cb2e6455abfa9 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -63,18 +63,6 @@ using schema::PrimitiveNode;
 
 namespace test {
 
-struct BufferedFloat16 {
-  explicit BufferedFloat16(Float16 f16)
-      : f16(f16), buffer(*::arrow::AllocateBuffer(sizeof(uint16_t))) {
-    this->f16.ToLittleEndian(buffer->mutable_data());
-  }
-  explicit BufferedFloat16(uint16_t bits) : BufferedFloat16(Float16(bits)) {}
-  const uint8_t* bytes() const { return buffer->data(); }
-
-  Float16 f16;
-  std::shared_ptr<::arrow::Buffer> buffer;
-};
-
 // ----------------------------------------------------------------------
 // Test comparators
 
@@ -1142,30 +1130,24 @@ void TestStatisticsSortOrder<Float16LogicalType>::SetValues() {
   constexpr int kValueLen = 2;
   constexpr int kNumBytes = NUM_VALUES * kValueLen;
 
-  const uint16_t u16_vals[NUM_VALUES] = {
-      0b1100010100000000,  // -5.0
-      0b1100010000000000,  // -4.0
-      0b1100001000000000,  // -3.0
-      0b1100000000000000,  // -2.0
-      0b1011110000000000,  // -1.0
-      0b0000000000000000,  // +0.0
-      0b0011110000000000,  // +1.0
-      0b0100000000000000,  // +2.0
-      0b0100001000000000,  // +3.0
-      0b0100010000000000,  // +4.0
+  const Float16 f16_vals[NUM_VALUES] = {
+      Float16::FromFloat(+2.0f), Float16::FromFloat(-4.0f), Float16::FromFloat(+4.0f),
+      Float16::FromFloat(-2.0f), Float16::FromFloat(-1.0f), Float16::FromFloat(+3.0f),
+      Float16::FromFloat(+1.0f), Float16::FromFloat(-5.0f), Float16::FromFloat(+0.0f),
+      Float16::FromFloat(-3.0f),
   };
 
   values_buf_.resize(kNumBytes);
   uint8_t* ptr = values_buf_.data();
   for (int i = 0; i < NUM_VALUES; ++i) {
-    Float16(u16_vals[i]).ToLittleEndian(ptr);
+    f16_vals[i].ToLittleEndian(ptr);
     values_[i].ptr = ptr;
     ptr += kValueLen;
   }
 
   stats_[0]
-      .set_min(std::string(reinterpret_cast<const char*>(values_[0].ptr), kValueLen))
-      .set_max(std::string(reinterpret_cast<const char*>(values_[9].ptr), kValueLen));
+      .set_min(std::string(reinterpret_cast<const char*>(values_[7].ptr), kValueLen))
+      .set_max(std::string(reinterpret_cast<const char*>(values_[2].ptr), kValueLen));
 }
 
 TYPED_TEST_SUITE(TestStatisticsSortOrder, CompareTestTypes);
@@ -1503,6 +1485,17 @@ void TestFloatStatistics<T>::TestNaNs() {
                   valid_bitmap_no_nans);
 }
 
+struct BufferedFloat16 {
+  explicit BufferedFloat16(Float16 f16) : f16(f16) {
+    this->f16.ToLittleEndian(bytes_.data());
+  }
+  explicit BufferedFloat16(float f) : BufferedFloat16(Float16::FromFloat(f)) {}
+  const uint8_t* bytes() const { return bytes_.data(); }
+
+  Float16 f16;
+  std::array<uint8_t, 2> bytes_;
+};
+
 template <>
 void TestFloatStatistics<Float16LogicalType>::TestNaNs() {
   constexpr int kNumValues = 8;
@@ -1512,22 +1505,18 @@ void TestFloatStatistics<Float16LogicalType>::TestNaNs() {
 
   using F16 = BufferedFloat16;
   const auto nan_f16 = F16(std::numeric_limits<Float16>::quiet_NaN());
-  const auto min_f16 = F16(0xc400);  // -4.0
-  const auto max_f16 = F16(0x4200);  // +3.0
+  const auto min_f16 = F16(-4.0f);
+  const auto max_f16 = F16(+3.0f);
 
   const auto min = FLBA{min_f16.bytes()};
   const auto max = FLBA{max_f16.bytes()};
 
   std::array<F16, kNumValues> all_nans_f16 = {nan_f16, nan_f16, nan_f16, nan_f16,
                                               nan_f16, nan_f16, nan_f16, nan_f16};
-  std::array<F16, kNumValues> some_nans_f16 = {nan_f16,     max_f16,
-                                               F16(0xc200),  // -3.0
-                                               F16(0xbc00),  // -1.0
-                                               nan_f16,
-                                               F16(0x4000),  // +2.0
-                                               min_f16,     nan_f16};
+  std::array<F16, kNumValues> some_nans_f16 = {
+      nan_f16, max_f16, F16(-3.0f), F16(-1.0f), nan_f16, F16(+2.0f), min_f16, nan_f16};
   std::array<F16, kNumValues> other_nans_f16 = some_nans_f16;
-  other_nans_f16[0] = F16(0x3e00);  // +1.5
+  other_nans_f16[0] = F16(+1.5f);  // +1.5
 
   auto prepare_values = [](const auto& values) -> std::vector<FLBA> {
     std::vector<FLBA> out(values.size());

From 6496aefca6d979d1b9a636608d885097570a47e4 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 31 Aug 2023 15:38:40 -0400
Subject: [PATCH 21/37] Update Arrow reader

---
 cpp/src/parquet/arrow/reader_internal.cc | 79 ++++--------------------
 1 file changed, 12 insertions(+), 67 deletions(-)

diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index f4e3a89e71a31..e5aef5a45b5f3 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -715,74 +715,14 @@ Status TransferDecimal(RecordReader* reader, MemoryPool* pool,
   return Status::OK();
 }
 
-static inline Status ConvertToHalfFloat(const Array& array,
-                                        const std::shared_ptr<DataType>& type,
-                                        MemoryPool* pool, std::shared_ptr<Array>* out) {
-  constexpr int32_t byte_width = sizeof(uint16_t);
-  DCHECK_EQ(checked_cast<const ::arrow::HalfFloatType&>(*type).byte_width(), byte_width);
-
-  // We read the halffloat (uint16_t) bytes from a raw binary array, in which they're
-  // assumed to be little-endian.
-  const auto& binary_array = checked_cast<const ::arrow::FixedSizeBinaryArray&>(array);
-  DCHECK_EQ(checked_cast<const ::arrow::FixedSizeBinaryType&>(*binary_array.type())
-                .byte_width(),
-            byte_width);
-
-  // Number of elements in the halffloat array
-  const int64_t length = binary_array.length();
-  // Allocate data for the output halffloat array
-  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * byte_width, pool));
-  uint8_t* out_ptr = data->mutable_data();
-
-  const int64_t null_count = binary_array.null_count();
-  // Copy the values to the output array in native-endian format
-  if (null_count > 0) {
-    for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) {
-      Float16 f16{0};
-      if (binary_array.IsValid(i)) {
-        const uint8_t* in_ptr = binary_array.GetValue(i);
-        f16 = Float16::FromLittleEndian(in_ptr);
-      }
-      f16.ToBytes(out_ptr);
-    }
-  } else {
-#if ARROW_LITTLE_ENDIAN
-    // No need to byte-swap, so do a simple copy
-    std::memcpy(out_ptr, binary_array.raw_values(), length * byte_width);
-#else
-    for (int64_t i = 0; i < length; ++i, out_ptr += byte_width) {
-      const uint8_t* in_ptr = binary_array.GetValue(i);
-      Float16::FromLittleEndian(in_ptr).ToBytes(out_ptr);
-    }
-#endif
-  }
-
-  *out = std::make_shared<::arrow::HalfFloatArray>(
-      type, length, std::move(data), binary_array.null_bitmap(), null_count);
-  return Status::OK();
-}
-
-/// \brief Convert an arrow::BinaryArray to an arrow::HalfFloatArray
-/// We do this by:
-/// 1. Creating an arrow::BinaryArray from the RecordReader's builder
-/// 2. Allocating a buffer for the arrow::HalfFloatArray
-/// 3. Converting the little-endian bytes in each BinaryArray entry to native-endian
-/// halffloat (uint16_t) values
 Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool,
                          const std::shared_ptr<Field>& field, Datum* out) {
-  auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader);
-  DCHECK(binary_reader);
-  ::arrow::ArrayVector chunks = binary_reader->GetBuilderChunks();
-  for (size_t i = 0; i < chunks.size(); ++i) {
-    std::shared_ptr<Array> chunk_as_half;
-    RETURN_NOT_OK(ConvertToHalfFloat(*chunks[i], field->type(), pool, &chunk_as_half));
-    // Replace the chunk, which will hopefully also free memory as we go
-    chunks[i] = chunk_as_half;
-  }
-  if (!field->nullable()) {
-    ReconstructChunksWithoutNulls(&chunks);
-  }
-  *out = std::make_shared<ChunkedArray>(chunks, field->type());
+  static const auto binary_type = ::arrow::fixed_size_binary(2);
+  // Read as a FixedSizeBinaryArray - then, view as a HalfFloatArray
+  std::shared_ptr<ChunkedArray> chunked_array;
+  RETURN_NOT_OK(
+      TransferBinary(reader, pool, field->WithType(binary_type), &chunked_array));
+  ARROW_ASSIGN_OR_RAISE(*out, chunked_array->View(field->type()));
   return Status::OK();
 }
 
@@ -846,10 +786,15 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& va
       result = chunked_result;
     } break;
     case ::arrow::Type::HALF_FLOAT: {
+      const auto& type = *value_field->type();
       if (descr->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
-        return Status::Invalid("Physical type for ", value_field->type()->ToString(),
+        return Status::Invalid("Physical type for ", type.ToString(),
                                " must be fixed length binary");
       }
+      if (descr->type_length() != type.byte_width()) {
+        return Status::Invalid("Fixed length binary type for ", type.ToString(),
+                               " must have a byte width of ", type.byte_width());
+      }
       RETURN_NOT_OK(TransferHalfFloat(reader, pool, value_field, &result));
     } break;
     case ::arrow::Type::DECIMAL128: {

From 102dfb4c7fc22aea41e95ab614f65d773f7d7476 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 31 Aug 2023 15:40:13 -0400
Subject: [PATCH 22/37] Remove big-endian handling in column writer

---
 cpp/src/parquet/column_writer.cc | 45 ++------------------------------
 1 file changed, 2 insertions(+), 43 deletions(-)

diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 715432d005492..a7e7b2f93e174 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -2301,21 +2301,10 @@ struct SerializeFunctor<
 // Write Arrow to Float16
 
 // Requires a custom serializer because Float16s in Parquet are stored as a 2-byte
-// (little-endian) FLBA, whereas in Arrow they're a native `uint16_t`. Also, a temporary
-// buffer is needed if there's an endian mismatch.
+// (little-endian) FLBA, whereas in Arrow they're a native `uint16_t`.
 template <>
 struct SerializeFunctor<::parquet::FLBAType, ::arrow::HalfFloatType> {
-  Status Serialize(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx,
-                   FLBA* out) {
-#if ARROW_LITTLE_ENDIAN
-    return SerializeInPlace(array, ctx, out);
-#else
-    return SerializeWithScratch(array, ctx, out);
-#endif
-  }
-
-  Status SerializeInPlace(const ::arrow::HalfFloatArray& array, ArrowWriteContext*,
-                          FLBA* out) {
+  Status Serialize(const ::arrow::HalfFloatArray& array, ArrowWriteContext*, FLBA* out) {
     const uint16_t* values = array.raw_values();
     if (array.null_count() == 0) {
       for (int64_t i = 0; i < array.length(); ++i) {
@@ -2329,40 +2318,10 @@ struct SerializeFunctor<::parquet::FLBAType, ::arrow::HalfFloatType> {
     return Status::OK();
   }
 
-  Status SerializeWithScratch(const ::arrow::HalfFloatArray& array,
-                              ArrowWriteContext* ctx, FLBA* out) {
-    AllocateScratch(array, ctx);
-    if (array.null_count() == 0) {
-      for (int64_t i = 0; i < array.length(); ++i) {
-        out[i] = ToFLBA(array.Value(i));
-      }
-    } else {
-      for (int64_t i = 0; i < array.length(); ++i) {
-        out[i] = array.IsValid(i) ? ToFLBA(array.Value(i)) : FLBA{};
-      }
-    }
-    return Status::OK();
-  }
-
  private:
   FLBA ToFLBA(const uint16_t* value_ptr) const {
     return FLBA{reinterpret_cast<const uint8_t*>(value_ptr)};
   }
-  FLBA ToFLBA(uint16_t value) {
-    auto* out = reinterpret_cast<uint8_t*>(scratch_++);
-    Float16(value).ToLittleEndian(out);
-    return FLBA{out};
-  }
-
-  void AllocateScratch(const ::arrow::HalfFloatArray& array, ArrowWriteContext* ctx) {
-    int64_t non_null_count = array.length() - array.null_count();
-    int64_t size = non_null_count * sizeof(uint16_t);
-    scratch_buffer_ = AllocateBuffer(ctx->memory_pool, size);
-    scratch_ = reinterpret_cast<uint16_t*>(scratch_buffer_->mutable_data());
-  }
-
-  std::shared_ptr<ResizableBuffer> scratch_buffer_;
-  uint16_t* scratch_;
 };
 
 template <>

From 2a45f290ced8179dffb9040e3ba5ad8ef6a1d3ac Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 31 Aug 2023 15:41:10 -0400
Subject: [PATCH 23/37] Tweak Arrow/Parquet schema tests

---
 cpp/src/parquet/arrow/arrow_schema_test.cc | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index a1cc989ba8ea0..5443214f930d7 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -236,6 +236,8 @@ TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) {
        ::arrow::fixed_size_binary(12)},
       {"uuid", LogicalType::UUID(), ParquetType::FIXED_LEN_BYTE_ARRAY, 16,
        ::arrow::fixed_size_binary(16)},
+      {"float16", LogicalType::Float16(), ParquetType::FIXED_LEN_BYTE_ARRAY, 2,
+       ::arrow::float16()},
       {"none", LogicalType::None(), ParquetType::BOOLEAN, -1, ::arrow::boolean()},
       {"none", LogicalType::None(), ParquetType::INT32, -1, ::arrow::int32()},
       {"none", LogicalType::None(), ParquetType::INT64, -1, ::arrow::int64()},
@@ -908,6 +910,23 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
   // ASSERT_NO_FATAL_FAILURE();
 }
 
+TEST_F(TestConvertArrowSchema, ArrowNonconvertibleFields) {
+  struct FieldConstructionArguments {
+    std::string name;
+    std::shared_ptr<::arrow::DataType> datatype;
+  };
+
+  std::vector<FieldConstructionArguments> cases = {
+      {"run_end_encoded",
+       ::arrow::run_end_encoded(::arrow::int32(), ::arrow::list(::arrow::int8()))},
+  };
+
+  for (const FieldConstructionArguments& c : cases) {
+    auto field = ::arrow::field(c.name, c.datatype);
+    ASSERT_RAISES(NotImplemented, ConvertSchema({field}));
+  }
+}
+
 TEST_F(TestConvertArrowSchema, ParquetFlatPrimitivesAsDictionaries) {
   std::vector<NodePtr> parquet_fields;
   std::vector<std::shared_ptr<Field>> arrow_fields;

From d340a827814ec5b732a90ed77291c93347e36ce6 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 31 Aug 2023 15:42:04 -0400
Subject: [PATCH 24/37] Support `util::Float16` in `random_real`

---
 cpp/src/arrow/testing/random.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index cbdac3baa0109..32ae97c11bfa2 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -28,6 +28,7 @@
 #include "arrow/testing/uniform_real.h"
 #include "arrow/testing/visibility.h"
 #include "arrow/type.h"
+#include "arrow/util/float16.h"
 
 namespace arrow {
 
@@ -644,10 +645,20 @@ void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
 template <typename T, typename U>
 void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
                  std::vector<U>* out) {
+  using util::Float16;
+
   std::default_random_engine gen(seed);
-  ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
-  out->resize(n, static_cast<T>(0));
-  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
+  out->resize(n, static_cast<U>(T{0}));
+  if constexpr (std::is_same_v<T, Float16>) {
+    ::arrow::random::uniform_real_distribution<float> d(min_value.ToFloat(),
+                                                        max_value.ToFloat());
+    std::generate(out->begin(), out->end(),
+                  [&d, &gen] { return static_cast<U>(Float16::FromFloat(d(gen))); });
+  } else {
+    ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
+    std::generate(out->begin(), out->end(),
+                  [&d, &gen] { return static_cast<U>(d(gen)); });
+  }
 }
 
 template <typename T, typename U>

From 6285af1da5be01e6b5e5abb528785d28f7e6f221 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 31 Aug 2023 15:43:11 -0400
Subject: [PATCH 25/37] Update Arrow reader/writer tests

---
 .../parquet/arrow/arrow_reader_writer_test.cc |  6 +--
 cpp/src/parquet/arrow/test_util.h             | 38 ++++---------------
 2 files changed, 10 insertions(+), 34 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 8f1c64b81322b..fb9e53870583c 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -926,11 +926,11 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) {
 
 TYPED_TEST(TestParquetIO, SingleColumnOptionalDictionaryWrite) {
   switch (TypeParam::type_id) {
-    // Skip tests for BOOL as we don't create dictionaries for it.
     case ::arrow::Type::BOOL:
-    // Skip tests for HALF_FLOAT as it's not currently supported by `dictionary_encode`
+      GTEST_SKIP() << "dictionaries not created for BOOL";
+      break;
     case ::arrow::Type::HALF_FLOAT:
-      GTEST_SKIP();
+      GTEST_SKIP() << "dictionary_encode not supported for HALF_FLOAT";
       break;
     default:
       break;
diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index 74b8a36df8592..bd9b3ffe24c46 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -33,6 +33,7 @@
 #include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/float16.h"
 #include "parquet/column_reader.h"
 
 namespace parquet {
@@ -65,40 +66,15 @@ struct Decimal256WithPrecisionAndScale {
   static constexpr int32_t scale = PRECISION - 1;
 };
 
-inline std::vector<uint16_t> RandomHalfFloatValues(size_t size, uint16_t min,
-                                                   uint16_t max) {
-  auto to_signed = [](uint16_t in) -> int16_t {
-    // Clamp magnitude to exclude representations of NaN/infinity. Within this range,
-    // binary float16s have the same ordering as int16s after conversion.
-    int16_t out = static_cast<int16_t>(std::max(in & 0x7fff, 0x7bff));
-    // Negate if sign bit is set
-    return (in & 0x8000) != 0 ? -out : out;
-  };
-  auto to_unsigned = [](int16_t in) -> uint16_t {
-    uint16_t out = static_cast<uint16_t>(std::abs(in));
-    // Set sign bit if negative
-    return in < 0 ? (out | 0x8000) : out;
-  };
-
-  const auto signed_min = to_signed(min);
-  const auto signed_max = to_signed(max);
-  std::vector<int16_t> signed_values;
-  ::arrow::randint(size, signed_min, signed_max, &signed_values);
-
-  std::vector<uint16_t> values(signed_values.size());
-  std::transform(signed_values.begin(), signed_values.end(), values.begin(), to_unsigned);
-  return values;
-}
-
 template <class ArrowType>
 ::arrow::enable_if_floating_point<ArrowType, Status> NonNullArray(
     size_t size, std::shared_ptr<Array>* out) {
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
-    constexpr uint16_t min = 0x0000;  // 0.0
-    constexpr uint16_t max = 0x3c00;  // 1.0
-    values = RandomHalfFloatValues(size, min, max);
+    using ::arrow::util::Float16;
+    ::arrow::random_real(size, 0, Float16::FromFloat(0.0f), Float16::FromFloat(1.0f),
+                         &values);
   } else {
     ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1),
                          &values);
@@ -234,9 +210,9 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
-    constexpr uint16_t min = 0xf0e2;  // -1e4
-    constexpr uint16_t max = 0x70e2;  // +1e4
-    values = RandomHalfFloatValues(size, min, max);
+    using ::arrow::util::Float16;
+    ::arrow::random_real(size, seed, Float16::FromFloat(-1e4f), Float16::FromFloat(1e4f),
+                         &values);
   } else {
     ::arrow::random_real(size, seed, static_cast<c_type>(-1e10),
                          static_cast<c_type>(1e10), &values);

From 554de9deeb1feb8d5590395087e9d54d6298d453 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 31 Aug 2023 15:44:25 -0400
Subject: [PATCH 26/37] Add To/FromDouble methods to `Float16`

---
 cpp/src/arrow/util/float16.cc | 141 ++++++++++++++++++++++------------
 cpp/src/arrow/util/float16.h  |  11 ++-
 2 files changed, 100 insertions(+), 52 deletions(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index 5bdcdfab7ec49..0e9dfd820cafb 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <ostream>
+#include <type_traits>
 
 #include "arrow/util/float16.h"
 #include "arrow/util/ubsan.h"
@@ -40,28 +41,54 @@ namespace {
 // | exp bias  | 15      | 127     | 1023    |
 // |-----------------------------------------|
 
-// Converts a IEEE binary32 into a binary16. Rounds to nearest with ties to zero
-uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) {
+template <typename T>
+struct BinaryConverter {
+  static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint64_t>);
+
+  static constexpr int kNumBits = sizeof(T) * 8;
+  static constexpr int kMantNumBits = (kNumBits == 32) ? 23 : 52;
+  static constexpr int kExpNumBits = kNumBits - kMantNumBits - 1;
+
+  static constexpr int kExpBias = (1 << (kExpNumBits - 1)) - 1;
+
+  static constexpr T kMantMask = (T(1) << kMantNumBits) - 1;
+  static constexpr T kExpMask = ((T(1) << kExpNumBits) - 1) << kMantNumBits;
+  static constexpr T kSignMask = T(1) << (kNumBits - 1);
+
+  static_assert(kMantNumBits + kExpNumBits + 1 == kNumBits);
+  static_assert(kSignMask + kExpMask + kMantMask == ~T(0));
+
+  static uint16_t ToBinary16(T);
+  static T FromBinary16(uint16_t);
+};
+
+// Converts a IEEE binary32/64 into a binary16. Rounds to nearest with ties to zero
+template <typename T>
+uint16_t BinaryConverter<T>::ToBinary16(T f_bits) {
   // Sign mask for output binary16
-  const uint16_t h_sign = uint16_t((f_bits >> 16) & 0x8000);
+  const uint16_t h_sign = uint16_t((f_bits >> (kNumBits - 16)) & 0x8000);
 
-  // Exponent mask for input binary32
-  const uint32_t f_exp = f_bits & 0x7f800000u;
+  // Exponent mask for input binary
+  const T f_exp = f_bits & kExpMask;
   // Exponents as signed pre-shifted values for convenience. Here, we need to re-bias the
-  // binary32 exponent for a binary16. If, after re-biasing, the binary16 exponent falls
-  // outside of the range [1,30] then we need to handle the under/overflow case specially.
-  const int16_t f_biased_exp = int16_t(f_exp >> 23);
-  const int16_t unbiased_exp = f_biased_exp - 127;
-  const int16_t h_biased_exp = unbiased_exp + 15;
+  // exponent for a binary16. If, after re-biasing, the binary16 exponent falls outside of
+  // the range [1,30] then we need to handle the under/overflow case specially.
+  const int32_t f_biased_exp = int32_t(f_exp >> kMantNumBits);
+  const int32_t unbiased_exp = f_biased_exp - kExpBias;
+  const int32_t h_biased_exp = unbiased_exp + 15;
 
-  // Mantissa mask for input binary32
-  const uint32_t f_mant = f_bits & 0x007fffffu;
+  // Mantissa mask for input
+  const T f_mant = f_bits & kMantMask;
+
+  // We define a "rounding bit", which is the most significant bit to be dropped
+  // (e.g. for a binary32, 0x1000).
+  constexpr T rounding_bit = T(1) << (kMantNumBits - (10 + 1));
 
   // Handle exponent overflow, NaN, and +/-Inf
   if (h_biased_exp >= 0x1f) {
-    // The binary32 is a NaN representation
+    // The input is a NaN representation
     if (f_biased_exp == 0xff && f_mant != 0) {
-      uint16_t h_mant = uint16_t(f_mant >> 13);
+      uint16_t h_mant = uint16_t(f_mant >> (kMantNumBits - 10));
       // If the mantissa bit(s) indicating NaN were shifted out, add one back. Otherwise,
       // the result would be infinity.
       if (h_mant == 0) {
@@ -77,16 +104,16 @@ uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) {
   // Handle exponent underflow, subnormals, and +/-0
   if (h_biased_exp <= 0) {
     // If the underflow exceeds the number of bits in a binary16 mantissa (10) then we
-    // can't round, so just clamp to 0. Note that this also weeds out any binary32 values
+    // can't round, so just clamp to 0. Note that this also weeds out any input values
     // that are subnormal - including +/-0;
     if (h_biased_exp < -10) {
       return h_sign;
     }
 
     // Convert to a rounded subnormal value starting with the mantissa. Since the input
-    // binary32 is known to be normal at this point, we need to prepend its implicit
-    // leading bit - which also necessitates an additional right-shift.
-    uint32_t rounded_mant = 0x800000u | f_mant;
+    // input is known to be normal at this point, we need to prepend its implicit leading
+    // bit - which also necessitates an additional right-shift.
+    T rounded_mant = (T(1) << kMantNumBits) | f_mant;
     rounded_mant >>= (1 - h_biased_exp);
 
     // Here, we implement rounding to nearest (with ties to even)
@@ -95,41 +122,41 @@ uint16_t Binary32BitsToBinary16Bits(uint32_t f_bits) {
     //  - The lower 13 bits, which will be shifted out
     //  - The upper 10 bits, which will become the binary16's mantissa
     //
-    // We define a "rounding bit", which is the most significant bit to be dropped
-    // (0x1000). "Rounding to nearest" basically just means that we add 1 to the rounding
-    // bit. If it's set, then the bit will cascade upwards into the 10-bit mantissa (and
-    // potentially the exponent).
-    //
-    // The only time where we may NOT do this is when a "tie" occurs - i.e. when the
-    // rounding bit is set but all of the lower bits are 0. In that case, we don't add 1
-    // if the retained mantissa is "even" (its least significant bit is 0).
-    if ((rounded_mant & 0x3fffu) != 0x1000u || (f_mant & 0x7ffu) != 0) {
-      rounded_mant += 0x1000u;
+    // "Rounding to nearest" basically just means that we add 1 to the rounding bit. If
+    // it's set, then the bit will cascade upwards into the 10-bit mantissa (and
+    // potentially the exponent). The only time where we may NOT do this is when a "tie"
+    // occurs - i.e. when the rounding bit is set but all of the lower bits are 0. In that
+    // case, we don't add 1 if the retained mantissa is "even" (its least significant bit
+    // is 0).
+    if ((rounded_mant & ((rounding_bit << 2) - 1)) != rounding_bit ||
+        (f_mant & 0x7ffu) != 0) {
+      rounded_mant += rounding_bit;
     }
 
-    const uint16_t h_mant = uint16_t(rounded_mant >> 13);
+    const uint16_t h_mant = uint16_t(rounded_mant >> (kMantNumBits - 10));
     return h_sign + h_mant;
   }
 
   const uint16_t h_exp = uint16_t(h_biased_exp) << 10;
 
   // See comment on rounding behavior above
-  uint32_t rounded_mant = f_mant;
-  if ((rounded_mant & 0x3fffu) != 0x1000u) {
-    rounded_mant += 0x1000u;
+  T rounded_mant = f_mant;
+  if ((rounded_mant & ((rounding_bit << 2) - 1)) != rounding_bit) {
+    rounded_mant += rounding_bit;
   }
 
-  const uint16_t h_mant = uint16_t(rounded_mant >> 13);
+  const uint16_t h_mant = uint16_t(rounded_mant >> (kMantNumBits - 10));
   // Note that we ADD (rather than OR) the components because we want the carryover bit
   // from rounding the mantissa to cascade through the exponent (it shouldn't affect the
   // sign bit though).
   return h_sign + h_exp + h_mant;
 }
 
-// Converts a IEEE binary16 into a binary32
-uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) {
-  // Sign mask for output binary32
-  const uint32_t f_sign = uint32_t(h_bits & 0x8000u) << 16;
+// Converts a IEEE binary16 into a binary32/64
+template <typename T>
+T BinaryConverter<T>::FromBinary16(uint16_t h_bits) {
+  // Sign mask for output
+  const T f_sign = T(h_bits & 0x8000u) << (kNumBits - 16);
 
   // Exponent mask for input binary16
   const uint16_t h_exp = h_bits & 0x7c00;
@@ -139,46 +166,58 @@ uint32_t Binary16BitsToBinary32Bits(uint16_t h_bits) {
   switch (h_exp) {
     // Handle Inf and NaN
     case 0x7c00u:
-      return f_sign | 0x7f800000u | (uint32_t(h_mant) << 13);
+      return f_sign | kExpMask | (T(h_mant) << (kMantNumBits - 10));
     // Handle zeros and subnormals
     case 0x0000u: {
       // Input is +/-0
       if (h_mant == 0) {
         return f_sign;
       }
-      // Subnormal binary16 to normal binary32
+      // Subnormal binary16 to normal binary32/64
       //
-      // Start with an f32-biased exponent of 2^-15. We then decrement it until the most
-      // significant set bit is left-shifted out - as it doesn't get explicitly stored in
-      // normalized floating point values. Instead, its existence is implied by the new
-      // exponent.
-      uint32_t f_exp = 127 - 15;
-      uint32_t f_mant = uint32_t(h_mant) << 1;
+      // Start with an f32/64-biased exponent of 2^-15. We then decrement it until the
+      // most significant set bit is left-shifted out - as it doesn't get explicitly
+      // stored in normalized floating point values. Instead, its existence is implied by
+      // the new exponent.
+      T f_exp = kExpBias - 15;
+      T f_mant = T(h_mant) << 1;
       while ((f_mant & 0x0400u) == 0) {
         --f_exp;
         f_mant <<= 1;
       }
-      f_exp <<= 23;
-      f_mant = (f_mant & 0x03ffu) << 13;
+      f_exp <<= kMantNumBits;
+      f_mant = (f_mant & 0x03ffu) << (kMantNumBits - 10);
       return f_sign | f_exp | f_mant;
     } break;
     // Handle normals
     default:
-      // Equivalent to adding (127 - 15) to the exponent and shifting everything by 13.
-      return f_sign | ((uint32_t(h_bits & 0x7fffu) + 0x1c000u) << 13);
+      // Equivalent to rebiasing the exponent and shifting everything by the remaining
+      // mantissa bits.
+      return f_sign |
+             ((T(h_bits & 0x7fffu) + (T(kExpBias - 15) << 10)) << (kMantNumBits - 10));
   }
 }
 
 }  // namespace
 
 float Float16::ToFloat() const {
-  const uint32_t f_bits = Binary16BitsToBinary32Bits(value_);
+  const uint32_t f_bits = BinaryConverter<uint32_t>::FromBinary16(value_);
   return SafeCopy<float>(f_bits);
 }
 
 Float16 Float16::FromFloat(float f) {
   const uint32_t f_bits = SafeCopy<uint32_t>(f);
-  return Float16{Binary32BitsToBinary16Bits(f_bits)};
+  return Float16{BinaryConverter<uint32_t>::ToBinary16(f_bits)};
+}
+
+double Float16::ToDouble() const {
+  const uint64_t d_bits = BinaryConverter<uint64_t>::FromBinary16(value_);
+  return SafeCopy<double>(d_bits);
+}
+
+Float16 Float16::FromDouble(double d) {
+  const uint64_t d_bits = SafeCopy<uint64_t>(d);
+  return Float16{BinaryConverter<uint64_t>::ToBinary16(d_bits)};
 }
 
 std::ostream& operator<<(std::ostream& os, Float16 arg) { return (os << arg.ToFloat()); }
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index 7c8597a8ec542..7ae3b31767518 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -39,12 +39,18 @@ namespace util {
 /// - bit 15:     sign
 ///
 class ARROW_EXPORT Float16 {
+  constexpr static uint16_t ToBits(uint16_t bits) { return bits; }
  public:
   Float16() = default;
-  constexpr explicit Float16(uint16_t value) : value_(value) {}
+  // constexpr explicit Float16(uint16_t value) : value_(value) {}
+
+  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+  constexpr explicit Float16(T value) : value_(ToBits(value)) {}
 
   /// \brief Create a `Float16` from a 32-bit float (may lose precision)
   static Float16 FromFloat(float f);
+  /// \brief Create a `Float16` from a 64-bit float (may lose precision)
+  static Float16 FromDouble(double d);
 
   /// \brief Read a `Float16` from memory in native-endian byte order
   static Float16 FromBytes(const uint8_t* src) {
@@ -66,6 +72,7 @@ class ARROW_EXPORT Float16 {
   constexpr explicit operator uint16_t() const { return bits(); }
 
   explicit operator float() const { return ToFloat(); }
+  explicit operator double() const { return ToDouble(); }
 
   /// \brief Return true if the value is negative (sign bit is set)
   constexpr bool signbit() const { return (value_ & 0x8000) != 0; }
@@ -83,6 +90,8 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Convert to a 32-bit float
   float ToFloat() const;
+  /// \brief Convert to a 64-bit float
+  double ToDouble() const;
 
   /// \brief Copy the value's bytes in native-endian byte order
   void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }

From 40e58f5599e6d2d1168bed0db928366c0333e221 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Mon, 4 Sep 2023 23:07:17 -0400
Subject: [PATCH 27/37] Add tests for `double` conversions

---
 cpp/src/arrow/util/float16.cc      |   2 +-
 cpp/src/arrow/util/float16_test.cc | 197 ++++++++++++++++++++---------
 2 files changed, 141 insertions(+), 58 deletions(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index 0e9dfd820cafb..9bf002734c5fa 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -87,7 +87,7 @@ uint16_t BinaryConverter<T>::ToBinary16(T f_bits) {
   // Handle exponent overflow, NaN, and +/-Inf
   if (h_biased_exp >= 0x1f) {
     // The input is a NaN representation
-    if (f_biased_exp == 0xff && f_mant != 0) {
+    if (f_exp == kExpMask && f_mant != 0) {
       uint16_t h_mant = uint16_t(f_mant >> (kMantNumBits - 10));
       // If the mantissa bit(s) indicating NaN were shifted out, add one back. Otherwise,
       // the result would be infinity.
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index d69bf6954ce77..64d573e0d4aa0 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -18,32 +18,102 @@
 #include <array>
 #include <cmath>
 #include <utility>
-#include <vector>
 
 #include <gtest/gtest.h>
 
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/float16.h"
+#include "arrow/util/span.h"
 #include "arrow/util/ubsan.h"
 
-namespace arrow {
-namespace util {
+namespace arrow::util {
 namespace {
 
 template <typename T>
 using Limits = std::numeric_limits<T>;
 
 float F32(uint32_t bits) { return SafeCopy<float>(bits); }
+double F64(uint64_t bits) { return SafeCopy<double>(bits); }
 
-TEST(Float16Test, RoundTripFromFloat32) {
-  struct TestCase {
-    float f32;
-    uint16_t b16;
-    float f16_as_f32;
+Float16 ToFloat16(float f32) { return Float16::FromFloat(f32); }
+Float16 ToFloat16(double f64) { return Float16::FromDouble(f64); }
+
+template <typename T>
+class Float16ConversionTest : public ::testing::Test {
+ public:
+  struct RoundTripTestCase {
+    T input;
+    uint16_t bits;
+    T output;
   };
+
+  static void TestRoundTrip(span<const RoundTripTestCase> test_cases) {
+    for (size_t index = 0; index < test_cases.size(); ++index) {
+      ARROW_SCOPED_TRACE("i=", index);
+      const auto& tc = test_cases[index];
+
+      const auto f16 = ToFloat16(tc.input);
+      EXPECT_EQ(tc.bits, f16.bits());
+      EXPECT_EQ(tc.output, static_cast<T>(f16));
+
+      EXPECT_EQ(std::signbit(tc.output), f16.signbit());
+      EXPECT_EQ(std::isnan(tc.output), f16.is_nan());
+      EXPECT_EQ(std::isinf(tc.output), f16.is_infinity());
+      EXPECT_EQ(std::isfinite(tc.output), f16.is_finite());
+    }
+  }
+
+  static void TestRoundTripFromNaN(span<const T> test_cases) {
+    for (size_t i = 0; i < test_cases.size(); ++i) {
+      ARROW_SCOPED_TRACE("i=", i);
+      const auto input = test_cases[i];
+
+      ASSERT_TRUE(std::isnan(input));
+      const bool sign = std::signbit(input);
+
+      const Float16 f16 = ToFloat16(input);
+      EXPECT_TRUE(f16.is_nan());
+      EXPECT_EQ(std::isinf(input), f16.is_infinity());
+      EXPECT_EQ(std::isfinite(input), f16.is_finite());
+      EXPECT_EQ(sign, f16.signbit());
+
+      const auto output = static_cast<T>(f16);
+      EXPECT_TRUE(std::isnan(output));
+      EXPECT_EQ(sign, std::signbit(output));
+    }
+  }
+
+  void TestRoundTripFromInf() {
+    const T test_cases[] = {+Limits<T>::infinity(), -Limits<T>::infinity()};
+
+    for (size_t i = 0; i < std::size(test_cases); ++i) {
+      ARROW_SCOPED_TRACE("i=", i);
+      const auto input = test_cases[i];
+
+      ASSERT_TRUE(std::isinf(input));
+      const bool sign = std::signbit(input);
+
+      const Float16 f16 = ToFloat16(input);
+      EXPECT_TRUE(f16.is_infinity());
+      EXPECT_EQ(std::isfinite(input), f16.is_finite());
+      EXPECT_EQ(std::isnan(input), f16.is_nan());
+      EXPECT_EQ(sign, f16.signbit());
+
+      const auto output = static_cast<T>(f16);
+      EXPECT_TRUE(std::isinf(output));
+      EXPECT_EQ(sign, std::signbit(output));
+    }
+  }
+
+  void TestRoundTrip();
+  void TestRoundTripFromNaN();
+};
+
+template <>
+void Float16ConversionTest<float>::TestRoundTrip() {
   // Expected values were also manually validated with numpy-1.24.3
-  const TestCase test_cases[] = {
+  const RoundTripTestCase test_cases[] = {
       // +/-0.0f
       {F32(0x80000000u), 0b1000000000000000u, -0.0f},
       {F32(0x00000000u), 0b0000000000000000u, +0.0f},
@@ -71,63 +141,77 @@ TEST(Float16Test, RoundTripFromFloat32) {
       {F32(0x477fd001u), 0b0111101111111111u, 65504.0f},
       // 32-bit exp is 127 => 2^0, rounds to 16-bit exp of 16 => 2^1.
       {F32(0xbffff000u), 0b1100000000000000u, -2.0f},
+      // Extreme values should safely clamp to +/-inf
+      {Limits<float>::max(), 0b0111110000000000u, +Limits<float>::infinity()},
+      {Limits<float>::lowest(), 0b1111110000000000u, -Limits<float>::infinity()},
   };
 
-  for (size_t index = 0; index < std::size(test_cases); ++index) {
-    ARROW_SCOPED_TRACE("index=", index);
-    const auto& tc = test_cases[index];
-    const auto f16 = Float16::FromFloat(tc.f32);
-    EXPECT_EQ(tc.b16, f16.bits());
-    EXPECT_EQ(tc.f16_as_f32, f16.ToFloat());
-
-    EXPECT_EQ(std::signbit(tc.f16_as_f32), f16.signbit());
-    EXPECT_EQ(std::isnan(tc.f16_as_f32), f16.is_nan());
-    EXPECT_EQ(std::isinf(tc.f16_as_f32), f16.is_infinity());
-    EXPECT_EQ(std::isfinite(tc.f16_as_f32), f16.is_finite());
-  }
+  TestRoundTrip(span(test_cases, std::size(test_cases)));
 }
 
-TEST(Float16Test, RoundTripFromFloat32Nan) {
-  const float nan_test_cases[] = {
-      Limits<float>::quiet_NaN(), F32(0x7f800001u), F32(0xff800001u), F32(0x7fc00000u),
-      F32(0xff800001u),           F32(0x7fffffffu), F32(0xffffffffu)};
-
-  for (size_t i = 0; i < std::size(nan_test_cases); ++i) {
-    ARROW_SCOPED_TRACE("i=", i);
-    const auto f32 = nan_test_cases[i];
-
-    ASSERT_TRUE(std::isnan(f32));
-    const bool sign = std::signbit(f32);
-
-    const auto f16 = Float16::FromFloat(f32);
-    EXPECT_TRUE(f16.is_nan());
-    EXPECT_EQ(sign, f16.signbit());
+template <>
+void Float16ConversionTest<double>::TestRoundTrip() {
+  // Expected values were also manually validated with numpy-1.24.3
+  const RoundTripTestCase test_cases[] = {
+      // +/-0.0
+      {F64(0x8000000000000000u), 0b1000000000000000u, -0.0},
+      {F64(0x0000000000000000u), 0b0000000000000000u, +0.0},
+      // 64-bit exp is 998 => 2^-25. Rounding to nearest.
+      {F64(0xbe60000000000001u), 0b1000000000000001u, -5.9604644775390625e-8},
+      // 64-bit exp is 998 => 2^-25. Rounding to even.
+      {F64(0xbe60000000000000u), 0b1000000000000000u, -0.0},
+      // 64-bit exp is 997 => 2^-26. Underflow to zero.
+      {F64(0xbe50000000000001u), 0b1000000000000000u, -0.0},
+      // 64-bit exp is 1004 => 2^-19.
+      {F64(0xbec3400000000000u), 0b1000000000100110u, -2.2649765014648438e-6},
+      // 64-bit exp is 1004 => 2^-19.
+      {F64(0xbec3c00000000000u), 0b1000000000101000u, -2.3841857910156250e-6},
+      // 64-bit exp is 1008 => 2^-15. Rounding to nearest.
+      {F64(0xbf0ff40000000001u), 0b1000001111111111u, -6.0975551605224609e-5},
+      // 64-bit exp is 1008 => 2^-15. Rounds to 16-bit exp of 1 => 2^-14
+      {F64(0xbf0ffc0000000001u), 0b1000010000000000u, -6.1035156250000000e-5},
+      // 64-bit exp is 1038 => 2^15. Rounding to nearest.
+      {F64(0xc0e0020000000001u), 0b1111100000000001u, -32800.0},
+      // 64-bit exp is 1038 => 2^15. Rounding to even.
+      {F64(0xc0e0020000000000u), 0b1111100000000000u, -32768.0},
+      // 65520.0 rounds to inf
+      {F64(0x40effe0000000000u), 0b0111110000000000u, Limits<double>::infinity()},
+      // 65488.00000000001 rounds to 65504.0 (float16 max)
+      {F64(0x40effa0000000001u), 0b0111101111111111u, 65504.0},
+      // 64-bit exp is 1023 => 2^0, rounds to 16-bit exp of 16 => 2^1.
+      {F64(0xbffffe0000000000u), 0b1100000000000000u, -2.0},
+      // Extreme values should safely clamp to +/-inf
+      {Limits<double>::max(), 0b0111110000000000u, +Limits<double>::infinity()},
+      {Limits<double>::lowest(), 0b1111110000000000u, -Limits<double>::infinity()},
+  };
 
-    const auto f16_as_f32 = f16.ToFloat();
-    EXPECT_TRUE(std::isnan(f16_as_f32));
-    EXPECT_EQ(sign, std::signbit(f16_as_f32));
-  }
+  TestRoundTrip(span(test_cases, std::size(test_cases)));
 }
 
-TEST(Float16Test, RoundTripFromFloat32Inf) {
-  const float test_cases[] = {+Limits<float>::infinity(), -Limits<float>::infinity()};
+template <>
+void Float16ConversionTest<float>::TestRoundTripFromNaN() {
+  const float test_cases[] = {
+      Limits<float>::quiet_NaN(), F32(0x7f800001u), F32(0xff800001u), F32(0x7fc00000u),
+      F32(0xffc00000u),           F32(0x7fffffffu), F32(0xffffffffu)};
+  TestRoundTripFromNaN(span(test_cases, std::size(test_cases)));
+}
 
-  for (size_t i = 0; i < std::size(test_cases); ++i) {
-    ARROW_SCOPED_TRACE("i=", i);
-    const auto f32 = test_cases[i];
+template <>
+void Float16ConversionTest<double>::TestRoundTripFromNaN() {
+  const double test_cases[] = {Limits<double>::quiet_NaN(), F64(0x7ff0000000000001u),
+                               F64(0xfff0000000000001u),    F64(0x7ff8000000000000u),
+                               F64(0xfff8000000000000u),    F64(0x7fffffffffffffffu),
+                               F64(0xffffffffffffffffu)};
+  TestRoundTripFromNaN(span(test_cases, std::size(test_cases)));
+}
 
-    ASSERT_TRUE(std::isinf(f32));
-    const bool sign = std::signbit(f32);
+using NativeFloatTypes = ::testing::Types<float, double>;
 
-    const auto f16 = Float16::FromFloat(f32);
-    EXPECT_TRUE(f16.is_infinity());
-    EXPECT_EQ(sign, f16.signbit());
+TYPED_TEST_SUITE(Float16ConversionTest, NativeFloatTypes);
 
-    const auto f16_as_f32 = f16.ToFloat();
-    EXPECT_TRUE(std::isinf(f16_as_f32));
-    EXPECT_EQ(sign, std::signbit(f16_as_f32));
-  }
-}
+TYPED_TEST(Float16ConversionTest, RoundTrip) { this->TestRoundTrip(); }
+TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN(); }
+TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); }
 
 TEST(Float16Test, Compare) {
   constexpr float f32_inf = Limits<float>::infinity();
@@ -247,5 +331,4 @@ TEST(Float16Test, FromBytes) {
 }
 
 }  // namespace
-}  // namespace util
-}  // namespace arrow
+}  // namespace arrow::util

From 7407ce4c267fa70420f2ee6b4cc92651bfdab5fb Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Mon, 4 Sep 2023 23:11:19 -0400
Subject: [PATCH 28/37] Change misleading types

---
 cpp/src/arrow/util/float16.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index 9bf002734c5fa..bc2b8e455d3bc 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -73,9 +73,9 @@ uint16_t BinaryConverter<T>::ToBinary16(T f_bits) {
   // Exponents as signed pre-shifted values for convenience. Here, we need to re-bias the
   // exponent for a binary16. If, after re-biasing, the binary16 exponent falls outside of
   // the range [1,30] then we need to handle the under/overflow case specially.
-  const int32_t f_biased_exp = int32_t(f_exp >> kMantNumBits);
-  const int32_t unbiased_exp = f_biased_exp - kExpBias;
-  const int32_t h_biased_exp = unbiased_exp + 15;
+  const int16_t f_biased_exp = int16_t(f_exp >> kMantNumBits);
+  const int16_t unbiased_exp = f_biased_exp - kExpBias;
+  const int16_t h_biased_exp = unbiased_exp + 15;
 
   // Mantissa mask for input
   const T f_mant = f_bits & kMantMask;

From bb4ca6a0b3400faea73aa1a1d716e48c088987a1 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Mon, 4 Sep 2023 23:32:48 -0400
Subject: [PATCH 29/37] Some `Float16` API changes

Reverted several prior changes that were accidentally pushed

Enabled construction from native floats

Removed `uint16_t` conversion operator since it doesn't behave
consistently with standard floats. As a result, rolled back some of the
prior changes to `random_real` used in the Parquet test utils
---
 cpp/src/arrow/testing/random.h     | 17 +++--------------
 cpp/src/arrow/util/float16.h       | 18 ++++++++++--------
 cpp/src/arrow/util/float16_test.cc | 26 ++++++++++++++++++++------
 cpp/src/parquet/arrow/test_util.h  | 22 ++++++++++++++++------
 4 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 32ae97c11bfa2..cbdac3baa0109 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -28,7 +28,6 @@
 #include "arrow/testing/uniform_real.h"
 #include "arrow/testing/visibility.h"
 #include "arrow/type.h"
-#include "arrow/util/float16.h"
 
 namespace arrow {
 
@@ -645,20 +644,10 @@ void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
 template <typename T, typename U>
 void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
                  std::vector<U>* out) {
-  using util::Float16;
-
   std::default_random_engine gen(seed);
-  out->resize(n, static_cast<U>(T{0}));
-  if constexpr (std::is_same_v<T, Float16>) {
-    ::arrow::random::uniform_real_distribution<float> d(min_value.ToFloat(),
-                                                        max_value.ToFloat());
-    std::generate(out->begin(), out->end(),
-                  [&d, &gen] { return static_cast<U>(Float16::FromFloat(d(gen))); });
-  } else {
-    ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
-    std::generate(out->begin(), out->end(),
-                  [&d, &gen] { return static_cast<U>(d(gen)); });
-  }
+  ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
+  out->resize(n, static_cast<T>(0));
+  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
 }
 
 template <typename T, typename U>
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index 7ae3b31767518..c9e4594c0cd5d 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -25,6 +25,7 @@
 #include <type_traits>
 
 #include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/util/visibility.h"
 
@@ -39,18 +40,20 @@ namespace util {
 /// - bit 15:     sign
 ///
 class ARROW_EXPORT Float16 {
-  constexpr static uint16_t ToBits(uint16_t bits) { return bits; }
  public:
   Float16() = default;
-  // constexpr explicit Float16(uint16_t value) : value_(value) {}
+  constexpr explicit Float16(uint16_t value) : value_(value) {}
 
-  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
-  constexpr explicit Float16(T value) : value_(ToBits(value)) {}
+  template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = NULLPTR>
+  explicit Float16(T f) : Float16(FromNative(f)) {}
 
   /// \brief Create a `Float16` from a 32-bit float (may lose precision)
   static Float16 FromFloat(float f);
   /// \brief Create a `Float16` from a 64-bit float (may lose precision)
   static Float16 FromDouble(double d);
+  /// \brief Create a `Float16` from a native floating-point value (may lose precision)
+  static Float16 FromNative(float f) { return FromFloat(f); }
+  static Float16 FromNative(double d) { return FromDouble(d); }
 
   /// \brief Read a `Float16` from memory in native-endian byte order
   static Float16 FromBytes(const uint8_t* src) {
@@ -69,10 +72,6 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Return the value's integer representation
   constexpr uint16_t bits() const { return value_; }
-  constexpr explicit operator uint16_t() const { return bits(); }
-
-  explicit operator float() const { return ToFloat(); }
-  explicit operator double() const { return ToDouble(); }
 
   /// \brief Return true if the value is negative (sign bit is set)
   constexpr bool signbit() const { return (value_ & 0x8000) != 0; }
@@ -93,6 +92,9 @@ class ARROW_EXPORT Float16 {
   /// \brief Convert to a 64-bit float
   double ToDouble() const;
 
+  explicit operator float() const { return ToFloat(); }
+  explicit operator double() const { return ToDouble(); }
+
   /// \brief Copy the value's bytes in native-endian byte order
   void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }
   /// \brief Return the value's bytes in native-endian byte order
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index 64d573e0d4aa0..4e49532bdd6d3 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -36,9 +36,6 @@ using Limits = std::numeric_limits<T>;
 float F32(uint32_t bits) { return SafeCopy<float>(bits); }
 double F64(uint64_t bits) { return SafeCopy<double>(bits); }
 
-Float16 ToFloat16(float f32) { return Float16::FromFloat(f32); }
-Float16 ToFloat16(double f64) { return Float16::FromDouble(f64); }
-
 template <typename T>
 class Float16ConversionTest : public ::testing::Test {
  public:
@@ -53,7 +50,7 @@ class Float16ConversionTest : public ::testing::Test {
       ARROW_SCOPED_TRACE("i=", index);
       const auto& tc = test_cases[index];
 
-      const auto f16 = ToFloat16(tc.input);
+      const auto f16 = Float16(tc.input);
       EXPECT_EQ(tc.bits, f16.bits());
       EXPECT_EQ(tc.output, static_cast<T>(f16));
 
@@ -72,7 +69,7 @@ class Float16ConversionTest : public ::testing::Test {
       ASSERT_TRUE(std::isnan(input));
       const bool sign = std::signbit(input);
 
-      const Float16 f16 = ToFloat16(input);
+      const auto f16 = Float16(input);
       EXPECT_TRUE(f16.is_nan());
       EXPECT_EQ(std::isinf(input), f16.is_infinity());
       EXPECT_EQ(std::isfinite(input), f16.is_finite());
@@ -94,7 +91,7 @@ class Float16ConversionTest : public ::testing::Test {
       ASSERT_TRUE(std::isinf(input));
       const bool sign = std::signbit(input);
 
-      const Float16 f16 = ToFloat16(input);
+      const auto f16 = Float16(input);
       EXPECT_TRUE(f16.is_infinity());
       EXPECT_EQ(std::isfinite(input), f16.is_finite());
       EXPECT_EQ(std::isnan(input), f16.is_nan());
@@ -213,6 +210,23 @@ TYPED_TEST(Float16ConversionTest, RoundTrip) { this->TestRoundTrip(); }
 TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN(); }
 TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); }
 
+TEST(Float16Test, Constructors) {
+  constexpr auto from_int_0 = Float16(0);
+  constexpr auto from_int_1 = Float16(1);
+  const auto from_f32_0 = Float16(0.0f);
+  const auto from_f32_1 = Float16(1.0f);
+  const auto from_f64_0 = Float16(0.0);
+  const auto from_f64_1 = Float16(1.0);
+
+  ASSERT_EQ(0, from_int_0.bits());
+  ASSERT_EQ(0, from_f32_0.bits());
+  ASSERT_EQ(0, from_f64_0.bits());
+
+  ASSERT_EQ(1, from_int_1.bits());
+  ASSERT_EQ(0x3c00, from_f32_1.bits());
+  ASSERT_EQ(0x3c00, from_f64_1.bits());
+}
+
 TEST(Float16Test, Compare) {
   constexpr float f32_inf = Limits<float>::infinity();
   constexpr float f32_nan = Limits<float>::quiet_NaN();
diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index bd9b3ffe24c46..6036f47514eb1 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -66,15 +66,26 @@ struct Decimal256WithPrecisionAndScale {
   static constexpr int32_t scale = PRECISION - 1;
 };
 
+inline void RandomHalfFloatValues(int64_t n, uint32_t seed,
+                                  ::arrow::util::Float16 min_value,
+                                  ::arrow::util::Float16 max_value,
+                                  std::vector<uint16_t>* out) {
+  std::vector<float> values;
+  ::arrow::random_real(n, seed, static_cast<float>(min_value),
+                       static_cast<float>(max_value), &values);
+  out->resize(values.size());
+  std::transform(values.begin(), values.end(), out->begin(),
+                 [](float f) { return ::arrow::util::Float16(f).bits(); });
+}
+
 template <class ArrowType>
 ::arrow::enable_if_floating_point<ArrowType, Status> NonNullArray(
     size_t size, std::shared_ptr<Array>* out) {
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
-    using ::arrow::util::Float16;
-    ::arrow::random_real(size, 0, Float16::FromFloat(0.0f), Float16::FromFloat(1.0f),
-                         &values);
+    RandomHalfFloatValues(size, 0, ::arrow::util::Float16(0.0f),
+                          ::arrow::util::Float16(1.0f), &values);
   } else {
     ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1),
                          &values);
@@ -210,9 +221,8 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
-    using ::arrow::util::Float16;
-    ::arrow::random_real(size, seed, Float16::FromFloat(-1e4f), Float16::FromFloat(1e4f),
-                         &values);
+    RandomHalfFloatValues(size, seed, ::arrow::util::Float16(-1e4f),
+                          ::arrow::util::Float16(1e4f), &values);
   } else {
     ::arrow::random_real(size, seed, static_cast<c_type>(-1e10),
                          static_cast<c_type>(1e10), &values);

From 354f6f6e6eafa930579f296769b7e834978690de Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Tue, 5 Sep 2023 16:52:37 -0400
Subject: [PATCH 30/37] Refactor typed comparators

---
 cpp/src/parquet/statistics.cc | 39 +++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index 73caf2b46f555..0cea53d8f73bf 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -296,7 +296,8 @@ template <bool is_signed>
 struct CompareHelper<FLBAType, is_signed>
     : public BinaryLikeCompareHelperBase<FLBAType, is_signed> {};
 
-struct Float16CompareHelper {
+template <>
+struct CompareHelper<Float16LogicalType, /*is_signed=*/true> {
   using T = FLBA;
 
   static T DefaultMin() { return T{Float16Constants::max()}; }
@@ -412,12 +413,24 @@ optional<std::pair<ByteArray, ByteArray>> CleanStatistic(
   return min_max;
 }
 
-template <bool is_signed, typename DType,
-          typename HelperType = CompareHelper<DType, is_signed>>
-class TypedComparatorImpl : virtual public TypedComparator<DType> {
+template <typename T>
+struct RebindLogical {
+  using DType = T;
+  using c_type = typename DType::c_type;
+};
+
+template <>
+struct RebindLogical<Float16LogicalType> {
+  using DType = FLBAType;
+  using c_type = DType::c_type;
+};
+
+template <bool is_signed, typename DType>
+class TypedComparatorImpl
+    : virtual public TypedComparator<typename RebindLogical<DType>::DType> {
  public:
-  using T = typename DType::c_type;
-  using Helper = HelperType;
+  using T = typename RebindLogical<DType>::c_type;
+  using Helper = CompareHelper<DType, is_signed>;
 
   explicit TypedComparatorImpl(int type_length = -1) : type_length_(type_length) {}
 
@@ -464,7 +477,9 @@ class TypedComparatorImpl : virtual public TypedComparator<DType> {
     return {min, max};
   }
 
-  std::pair<T, T> GetMinMax(const ::arrow::Array& values) override;
+  std::pair<T, T> GetMinMax(const ::arrow::Array& values) override {
+    ParquetException::NYI(values.type()->ToString());
+  }
 
  private:
   int type_length_;
@@ -492,12 +507,6 @@ TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* va
   return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)};
 }
 
-template <bool is_signed, typename DType, typename Helper>
-std::pair<typename DType::c_type, typename DType::c_type>
-TypedComparatorImpl<is_signed, DType, Helper>::GetMinMax(const ::arrow::Array& values) {
-  ParquetException::NYI(values.type()->ToString());
-}
-
 template <bool is_signed>
 std::pair<ByteArray, ByteArray> GetMinMaxBinaryHelper(
     const TypedComparatorImpl<is_signed, ByteArrayType>& comparator,
@@ -926,8 +935,8 @@ std::shared_ptr<Comparator> DoMakeComparator(Type::type physical_type,
         return std::make_shared<TypedComparatorImpl<true, ByteArrayType>>();
       case Type::FIXED_LEN_BYTE_ARRAY:
         if (logical_type == LogicalType::Type::FLOAT16) {
-          return std::make_shared<
-              TypedComparatorImpl<true, FLBAType, Float16CompareHelper>>(type_length);
+          return std::make_shared<TypedComparatorImpl<true, Float16LogicalType>>(
+              type_length);
         }
         return std::make_shared<TypedComparatorImpl<true, FLBAType>>(type_length);
       default:

From ea5f5dc031991f2c2474fb10a5cf48d7dae8c47c Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Tue, 5 Sep 2023 16:53:55 -0400
Subject: [PATCH 31/37] Add logical type to docs

---
 docs/source/cpp/parquet.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst
index 23fca8fd73010..3e06352f5dde3 100644
--- a/docs/source/cpp/parquet.rst
+++ b/docs/source/cpp/parquet.rst
@@ -481,6 +481,8 @@ physical type.
 +-------------------+-----------------------------+----------------------------+---------+
 | MAP               | Any                         | Map                        | \(6)    |
 +-------------------+-----------------------------+----------------------------+---------+
+| FLOAT16           | FIXED_LENGTH_BYTE_ARRAY     | HalfFloat                  |         |
++-------------------+-----------------------------+----------------------------+---------+
 
 * \(1) On the write side, the Parquet physical type INT32 is generated.
 

From ab846302c4e2667627adfd979b9bfaca1e29ccb1 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 19 Oct 2023 14:14:44 -0400
Subject: [PATCH 32/37] Replace public `Float16(uint16_t)` constructor

---
 cpp/src/arrow/util/float16.cc      |  4 +-
 cpp/src/arrow/util/float16.h       | 40 +++++++++--------
 cpp/src/arrow/util/float16_test.cc | 70 ++++++++++++++----------------
 cpp/src/parquet/statistics.cc      |  8 ++--
 4 files changed, 59 insertions(+), 63 deletions(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index bc2b8e455d3bc..873c2e1cea534 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -207,7 +207,7 @@ float Float16::ToFloat() const {
 
 Float16 Float16::FromFloat(float f) {
   const uint32_t f_bits = SafeCopy<uint32_t>(f);
-  return Float16{BinaryConverter<uint32_t>::ToBinary16(f_bits)};
+  return FromBits(BinaryConverter<uint32_t>::ToBinary16(f_bits));
 }
 
 double Float16::ToDouble() const {
@@ -217,7 +217,7 @@ double Float16::ToDouble() const {
 
 Float16 Float16::FromDouble(double d) {
   const uint64_t d_bits = SafeCopy<uint64_t>(d);
-  return Float16{BinaryConverter<uint64_t>::ToBinary16(d_bits)};
+  return FromBits(BinaryConverter<uint64_t>::ToBinary16(d_bits));
 }
 
 std::ostream& operator<<(std::ostream& os, Float16 arg) { return (os << arg.ToFloat()); }
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index c9e4594c0cd5d..5ba39e62e4328 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -42,32 +42,32 @@ namespace util {
 class ARROW_EXPORT Float16 {
  public:
   Float16() = default;
-  constexpr explicit Float16(uint16_t value) : value_(value) {}
-
-  template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = NULLPTR>
-  explicit Float16(T f) : Float16(FromNative(f)) {}
-
+  explicit Float16(float f) : Float16(FromFloat(f)) {}
+  explicit Float16(double d) : Float16(FromDouble(d)) {}
+  template <typename T,
+            typename std::enable_if_t<std::is_convertible_v<T, double>>* = NULLPTR>
+  explicit Float16(T v) : Float16(static_cast<double>(v)) {}
+
+  /// \brief Create a `Float16` from its exact binary representation
+  constexpr static Float16 FromBits(uint16_t bits) { return Float16{bits, bool{}}; }
   /// \brief Create a `Float16` from a 32-bit float (may lose precision)
   static Float16 FromFloat(float f);
   /// \brief Create a `Float16` from a 64-bit float (may lose precision)
   static Float16 FromDouble(double d);
-  /// \brief Create a `Float16` from a native floating-point value (may lose precision)
-  static Float16 FromNative(float f) { return FromFloat(f); }
-  static Float16 FromNative(double d) { return FromDouble(d); }
 
   /// \brief Read a `Float16` from memory in native-endian byte order
   static Float16 FromBytes(const uint8_t* src) {
-    return Float16(SafeLoadAs<uint16_t>(src));
+    return FromBits(SafeLoadAs<uint16_t>(src));
   }
 
   /// \brief Read a `Float16` from memory in little-endian byte order
   static Float16 FromLittleEndian(const uint8_t* src) {
-    return Float16(::arrow::bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
+    return FromBits(::arrow::bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
   }
 
   /// \brief Read a `Float16` from memory in big-endian byte order
   static Float16 FromBigEndian(const uint8_t* src) {
-    return Float16(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
+    return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
   }
 
   /// \brief Return the value's integer representation
@@ -108,7 +108,7 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Copy the value's bytes in little-endian byte order
   void ToLittleEndian(uint8_t* dest) const {
-    Float16{::arrow::bit_util::ToLittleEndian(value_)}.ToBytes(dest);
+    FromBits(::arrow::bit_util::ToLittleEndian(value_)).ToBytes(dest);
   }
   /// \brief Return the value's bytes in little-endian byte order
   constexpr std::array<uint8_t, 2> ToLittleEndian() const {
@@ -121,7 +121,7 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Copy the value's bytes in big-endian byte order
   void ToBigEndian(uint8_t* dest) const {
-    Float16{::arrow::bit_util::ToBigEndian(value_)}.ToBytes(dest);
+    FromBits(::arrow::bit_util::ToBigEndian(value_)).ToBytes(dest);
   }
   /// \brief Return the value's bytes in big-endian byte order
   constexpr std::array<uint8_t, 2> ToBigEndian() const {
@@ -132,8 +132,8 @@ class ARROW_EXPORT Float16 {
 #endif
   }
 
-  constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); }
-  constexpr Float16 operator+() const { return Float16(value_); }
+  constexpr Float16 operator-() const { return FromBits(value_ ^ 0x8000); }
+  constexpr Float16 operator+() const { return FromBits(value_); }
 
   friend constexpr bool operator==(Float16 lhs, Float16 rhs) {
     if (lhs.is_nan() || rhs.is_nan()) return false;
@@ -159,6 +159,8 @@ class ARROW_EXPORT Float16 {
   uint16_t value_;
 
  private:
+  constexpr Float16(uint16_t value, bool) : value_(value) {}
+
   // Comparison helpers that assume neither operand is NaN
   static constexpr bool CompareEq(Float16 lhs, Float16 rhs) {
     return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero());
@@ -197,11 +199,11 @@ class std::numeric_limits<arrow::util::Float16> {
   static constexpr bool has_infinity = true;
   static constexpr bool has_quiet_NaN = true;
 
-  static constexpr T min() { return T(0b0000010000000000); }
-  static constexpr T max() { return T(0b0111101111111111); }
+  static constexpr T min() { return T::FromBits(0b0000010000000000); }
+  static constexpr T max() { return T::FromBits(0b0111101111111111); }
   static constexpr T lowest() { return -max(); }
 
-  static constexpr T infinity() { return T(0b0111110000000000); }
+  static constexpr T infinity() { return T::FromBits(0b0111110000000000); }
 
-  static constexpr T quiet_NaN() { return T(0b0111111111111111); }
+  static constexpr T quiet_NaN() { return T::FromBits(0b0111111111111111); }
 };
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index 4e49532bdd6d3..dc8833d871baf 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -211,20 +211,14 @@ TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN
 TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); }
 
 TEST(Float16Test, Constructors) {
-  constexpr auto from_int_0 = Float16(0);
-  constexpr auto from_int_1 = Float16(1);
-  const auto from_f32_0 = Float16(0.0f);
-  const auto from_f32_1 = Float16(1.0f);
-  const auto from_f64_0 = Float16(0.0);
-  const auto from_f64_1 = Float16(1.0);
-
-  ASSERT_EQ(0, from_int_0.bits());
-  ASSERT_EQ(0, from_f32_0.bits());
-  ASSERT_EQ(0, from_f64_0.bits());
-
-  ASSERT_EQ(1, from_int_1.bits());
-  ASSERT_EQ(0x3c00, from_f32_1.bits());
-  ASSERT_EQ(0x3c00, from_f64_1.bits());
+  // Construction from exact bits
+  ASSERT_EQ(1, Float16::FromBits(1).bits());
+  // Construction from floating point (including implicit conversions)
+  int i = 0;
+  for (auto f16 : {Float16(1.0f), Float16(1.0), Float16(1)}) {
+    ARROW_SCOPED_TRACE("i=", i++);
+    ASSERT_EQ(0x3c00, f16.bits());
+  }
 }
 
 TEST(Float16Test, Compare) {
@@ -241,30 +235,30 @@ TEST(Float16Test, Compare) {
       {+Limits<Float16>::infinity(), +f32_inf},
       {-Limits<Float16>::infinity(), -f32_inf},
       // Multiple (semantically equivalent) NaN representations
-      {Float16(0x7e00), f32_nan},
-      {Float16(0xfe00), f32_nan},
-      {Float16(0x7fff), f32_nan},
-      {Float16(0xffff), f32_nan},
+      {Float16::FromBits(0x7e00), f32_nan},
+      {Float16::FromBits(0xfe00), f32_nan},
+      {Float16::FromBits(0x7fff), f32_nan},
+      {Float16::FromBits(0xffff), f32_nan},
       // Positive/negative zeros
-      {Float16(0x0000), +0.0f},
-      {Float16(0x8000), -0.0f},
+      {Float16::FromBits(0x0000), +0.0f},
+      {Float16::FromBits(0x8000), -0.0f},
       // Miscellaneous values. In general, they're chosen to test the sign/exponent and
       // exponent/mantissa boundaries
-      {Float16(0x101c), +0.00050163269043f},
-      {Float16(0x901c), -0.00050163269043f},
-      {Float16(0x101d), +0.000502109527588f},
-      {Float16(0x901d), -0.000502109527588f},
-      {Float16(0x121c), +0.00074577331543f},
-      {Float16(0x921c), -0.00074577331543f},
-      {Float16(0x141c), +0.00100326538086f},
-      {Float16(0x941c), -0.00100326538086f},
-      {Float16(0x501c), +32.875f},
-      {Float16(0xd01c), -32.875f},
+      {Float16::FromBits(0x101c), +0.00050163269043f},
+      {Float16::FromBits(0x901c), -0.00050163269043f},
+      {Float16::FromBits(0x101d), +0.000502109527588f},
+      {Float16::FromBits(0x901d), -0.000502109527588f},
+      {Float16::FromBits(0x121c), +0.00074577331543f},
+      {Float16::FromBits(0x921c), -0.00074577331543f},
+      {Float16::FromBits(0x141c), +0.00100326538086f},
+      {Float16::FromBits(0x941c), -0.00100326538086f},
+      {Float16::FromBits(0x501c), +32.875f},
+      {Float16::FromBits(0xd01c), -32.875f},
       // A few subnormals for good measure
-      {Float16(0x001c), +1.66893005371e-06f},
-      {Float16(0x801c), -1.66893005371e-06f},
-      {Float16(0x021c), +3.21865081787e-05f},
-      {Float16(0x821c), -3.21865081787e-05f},
+      {Float16::FromBits(0x001c), +1.66893005371e-06f},
+      {Float16::FromBits(0x801c), -1.66893005371e-06f},
+      {Float16::FromBits(0x021c), +3.21865081787e-05f},
+      {Float16::FromBits(0x821c), -3.21865081787e-05f},
   };
 
   auto expect_op = [&](std::string op_name, auto op) {
@@ -302,7 +296,7 @@ TEST(Float16Test, Compare) {
 }
 
 TEST(Float16Test, ToBytes) {
-  constexpr auto f16 = Float16(0xd01c);
+  constexpr auto f16 = Float16::FromBits(0xd01c);
   std::array<uint8_t, 2> bytes;
   auto load = [&bytes]() { return SafeLoadAs<uint16_t>(bytes.data()); };
 
@@ -334,10 +328,10 @@ TEST(Float16Test, ToBytes) {
 TEST(Float16Test, FromBytes) {
   constexpr uint16_t u16 = 0xd01c;
   const auto* data = reinterpret_cast<const uint8_t*>(&u16);
-  ASSERT_EQ(Float16::FromBytes(data), Float16(0xd01c));
+  ASSERT_EQ(Float16::FromBytes(data), Float16::FromBits(0xd01c));
 #if ARROW_LITTLE_ENDIAN
-  ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0xd01c));
-  ASSERT_EQ(Float16::FromBigEndian(data), Float16(0x1cd0));
+  ASSERT_EQ(Float16::FromLittleEndian(data), Float16::FromBits(0xd01c));
+  ASSERT_EQ(Float16::FromBigEndian(data), Float16::FromBits(0x1cd0));
 #else
   ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0));
   ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c));
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index 0cea53d8f73bf..37b245e0dd6c2 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -68,8 +68,8 @@ struct Float16Constants {
   static constexpr Bytes lowest_ =
       std::numeric_limits<Float16>::lowest().ToLittleEndian();
   static constexpr Bytes max_ = std::numeric_limits<Float16>::max().ToLittleEndian();
-  static constexpr Bytes positive_zero_ = (+Float16(0)).ToLittleEndian();
-  static constexpr Bytes negative_zero_ = (-Float16(0)).ToLittleEndian();
+  static constexpr Bytes positive_zero_ = (+Float16::FromBits(0)).ToLittleEndian();
+  static constexpr Bytes negative_zero_ = (-Float16::FromBits(0)).ToLittleEndian();
 };
 
 template <typename DType, bool is_signed>
@@ -384,10 +384,10 @@ optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_
     return ::std::nullopt;
   }
 
-  if (min == Float16(0)) {
+  if (min.is_zero() && !min.signbit()) {
     min_flba = FLBA{Float16Constants::negative_zero()};
   }
-  if (max == -Float16(0)) {
+  if (max.is_zero() && max.signbit()) {
     max_flba = FLBA{Float16Constants::positive_zero()};
   }
 

From c8404bbafddf9237e8247161a122d3054adf9f88 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Thu, 19 Oct 2023 15:26:15 -0400
Subject: [PATCH 33/37] `Float16` tweaks, add constexpr tests

---
 cpp/src/arrow/util/float16.cc      |  4 ++--
 cpp/src/arrow/util/float16.h       | 38 ++++++++++++++----------------
 cpp/src/arrow/util/float16_test.cc | 25 ++++++++++++++++++++
 3 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
index 873c2e1cea534..5c8b3d10ca0cd 100644
--- a/cpp/src/arrow/util/float16.cc
+++ b/cpp/src/arrow/util/float16.cc
@@ -201,7 +201,7 @@ T BinaryConverter<T>::FromBinary16(uint16_t h_bits) {
 }  // namespace
 
 float Float16::ToFloat() const {
-  const uint32_t f_bits = BinaryConverter<uint32_t>::FromBinary16(value_);
+  const uint32_t f_bits = BinaryConverter<uint32_t>::FromBinary16(bits_);
   return SafeCopy<float>(f_bits);
 }
 
@@ -211,7 +211,7 @@ Float16 Float16::FromFloat(float f) {
 }
 
 double Float16::ToDouble() const {
-  const uint64_t d_bits = BinaryConverter<uint64_t>::FromBinary16(value_);
+  const uint64_t d_bits = BinaryConverter<uint64_t>::FromBinary16(bits_);
   return SafeCopy<double>(d_bits);
 }
 
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index 5ba39e62e4328..888936797c870 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -70,22 +70,20 @@ class ARROW_EXPORT Float16 {
     return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
   }
 
-  /// \brief Return the value's integer representation
-  constexpr uint16_t bits() const { return value_; }
+  /// \brief Return the value's binary representation as a `uint16_t`
+  constexpr uint16_t bits() const { return bits_; }
 
   /// \brief Return true if the value is negative (sign bit is set)
-  constexpr bool signbit() const { return (value_ & 0x8000) != 0; }
+  constexpr bool signbit() const { return (bits_ & 0x8000) != 0; }
 
   /// \brief Return true if the value is NaN
-  constexpr bool is_nan() const {
-    return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0;
-  }
+  constexpr bool is_nan() const { return (bits_ & 0x7fff) > 0x7c00; }
   /// \brief Return true if the value is positive/negative infinity
-  constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; }
+  constexpr bool is_infinity() const { return (bits_ & 0x7fff) == 0x7c00; }
   /// \brief Return true if the value is finite and not NaN
-  constexpr bool is_finite() const { return (value_ & 0x7c00) != 0x7c00; }
+  constexpr bool is_finite() const { return (bits_ & 0x7c00) != 0x7c00; }
   /// \brief Return true if the value is positive/negative zero
-  constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }
+  constexpr bool is_zero() const { return (bits_ & 0x7fff) == 0; }
 
   /// \brief Convert to a 32-bit float
   float ToFloat() const;
@@ -96,7 +94,7 @@ class ARROW_EXPORT Float16 {
   explicit operator double() const { return ToDouble(); }
 
   /// \brief Copy the value's bytes in native-endian byte order
-  void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }
+  void ToBytes(uint8_t* dest) const { std::memcpy(dest, &bits_, sizeof(bits_)); }
   /// \brief Return the value's bytes in native-endian byte order
   constexpr std::array<uint8_t, 2> ToBytes() const {
 #if ARROW_LITTLE_ENDIAN
@@ -108,32 +106,32 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Copy the value's bytes in little-endian byte order
   void ToLittleEndian(uint8_t* dest) const {
-    FromBits(::arrow::bit_util::ToLittleEndian(value_)).ToBytes(dest);
+    FromBits(::arrow::bit_util::ToLittleEndian(bits_)).ToBytes(dest);
   }
   /// \brief Return the value's bytes in little-endian byte order
   constexpr std::array<uint8_t, 2> ToLittleEndian() const {
 #if ARROW_LITTLE_ENDIAN
-    return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)};
+    return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)};
 #else
-    return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)};
+    return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)};
 #endif
   }
 
   /// \brief Copy the value's bytes in big-endian byte order
   void ToBigEndian(uint8_t* dest) const {
-    FromBits(::arrow::bit_util::ToBigEndian(value_)).ToBytes(dest);
+    FromBits(::arrow::bit_util::ToBigEndian(bits_)).ToBytes(dest);
   }
   /// \brief Return the value's bytes in big-endian byte order
   constexpr std::array<uint8_t, 2> ToBigEndian() const {
 #if ARROW_LITTLE_ENDIAN
-    return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)};
+    return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)};
 #else
-    return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)};
+    return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)};
 #endif
   }
 
-  constexpr Float16 operator-() const { return FromBits(value_ ^ 0x8000); }
-  constexpr Float16 operator+() const { return FromBits(value_); }
+  constexpr Float16 operator-() const { return FromBits(bits_ ^ 0x8000); }
+  constexpr Float16 operator+() const { return FromBits(bits_); }
 
   friend constexpr bool operator==(Float16 lhs, Float16 rhs) {
     if (lhs.is_nan() || rhs.is_nan()) return false;
@@ -156,10 +154,10 @@ class ARROW_EXPORT Float16 {
   ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg);
 
  protected:
-  uint16_t value_;
+  uint16_t bits_;
 
  private:
-  constexpr Float16(uint16_t value, bool) : value_(value) {}
+  constexpr Float16(uint16_t bits, bool) : bits_(bits) {}
 
   // Comparison helpers that assume neither operand is NaN
   static constexpr bool CompareEq(Float16 lhs, Float16 rhs) {
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
index dc8833d871baf..073375882e3c2 100644
--- a/cpp/src/arrow/util/float16_test.cc
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -210,6 +210,31 @@ TYPED_TEST(Float16ConversionTest, RoundTrip) { this->TestRoundTrip(); }
 TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN(); }
 TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); }
 
+TEST(Float16Test, ConstexprFunctions) {
+  constexpr auto a = Float16::FromBits(0xbc00);  // -1.0
+  constexpr auto b = Float16::FromBits(0x3c00);  // +1.0
+
+  static_assert(a.bits() == 0xbc00);
+  static_assert(a.signbit() == true);
+  static_assert(a.is_nan() == false);
+  static_assert(a.is_infinity() == false);
+  static_assert(a.is_finite() == true);
+  static_assert(a.is_zero() == false);
+
+  static_assert((a == b) == false);
+  static_assert((a != b) == true);
+  static_assert((a < b) == true);
+  static_assert((a > b) == false);
+  static_assert((a <= b) == true);
+  static_assert((a >= b) == false);
+  static_assert(-a == +b);
+
+  constexpr auto v = Float16::FromBits(0xffff);
+  static_assert(v.ToBytes()[0] == 0xff);
+  static_assert(v.ToLittleEndian()[0] == 0xff);
+  static_assert(v.ToBigEndian()[0] == 0xff);
+}
+
 TEST(Float16Test, Constructors) {
   // Construction from exact bits
   ASSERT_EQ(1, Float16::FromBits(1).bits());

From 157e0d7e1a07eb1a410cb7ad62069dce7f7c515a Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Fri, 20 Oct 2023 16:59:35 -0400
Subject: [PATCH 34/37] Add test for `ColumnIndex`/`BoundaryOrder`

---
 cpp/src/parquet/page_index_test.cc | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc
index 5bfe38522af7b..4db49b4267415 100644
--- a/cpp/src/parquet/page_index_test.cc
+++ b/cpp/src/parquet/page_index_test.cc
@@ -21,6 +21,7 @@
 #include <memory>
 
 #include "arrow/io/file.h"
+#include "arrow/util/float16.h"
 #include "parquet/file_reader.h"
 #include "parquet/metadata.h"
 #include "parquet/schema.h"
@@ -579,6 +580,27 @@ TEST(PageIndex, WriteFLBAColumnIndex) {
                             /*has_null_counts=*/false);
 }
 
+TEST(PageIndex, WriteFloat16ColumnIndex) {
+  using ::arrow::util::Float16;
+  auto encode = [](auto value) {
+    auto bytes = Float16(value).ToLittleEndian();
+    return std::string(reinterpret_cast<const char*>(bytes.data()), bytes.size());
+  };
+
+  // Float16 (FLBA) values in the ascending order and without null count.
+  std::vector<EncodedStatistics> page_stats(4);
+  page_stats.at(0).set_min(encode(-1.3)).set_max(encode(+3.6));
+  page_stats.at(1).set_min(encode(-0.2)).set_max(encode(+4.5));
+  page_stats.at(2).set_min(encode(+1.1)).set_max(encode(+5.4));
+  page_stats.at(3).set_min(encode(+2.0)).set_max(encode(+6.3));
+
+  auto node = schema::PrimitiveNode::Make(
+      "c1", Repetition::OPTIONAL, LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY,
+      /*length=*/2);
+  TestWriteTypedColumnIndex(std::move(node), page_stats, BoundaryOrder::Ascending,
+                            /*has_null_counts=*/false);
+}
+
 TEST(PageIndex, WriteColumnIndexWithAllNullPages) {
   // All values are null.
   std::vector<EncodedStatistics> page_stats(3);

From 5eb90d7e9cd9338265c2d91991047fe519034f15 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Tue, 14 Nov 2023 12:36:46 -0500
Subject: [PATCH 35/37] Tweak ToEndian methods

---
 cpp/src/arrow/util/float16.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
index 888936797c870..0a432fee2cd31 100644
--- a/cpp/src/arrow/util/float16.h
+++ b/cpp/src/arrow/util/float16.h
@@ -106,7 +106,8 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Copy the value's bytes in little-endian byte order
   void ToLittleEndian(uint8_t* dest) const {
-    FromBits(::arrow::bit_util::ToLittleEndian(bits_)).ToBytes(dest);
+    const auto bytes = ToLittleEndian();
+    std::memcpy(dest, bytes.data(), bytes.size());
   }
   /// \brief Return the value's bytes in little-endian byte order
   constexpr std::array<uint8_t, 2> ToLittleEndian() const {
@@ -119,7 +120,8 @@ class ARROW_EXPORT Float16 {
 
   /// \brief Copy the value's bytes in big-endian byte order
   void ToBigEndian(uint8_t* dest) const {
-    FromBits(::arrow::bit_util::ToBigEndian(bits_)).ToBytes(dest);
+    const auto bytes = ToBigEndian();
+    std::memcpy(dest, bytes.data(), bytes.size());
   }
   /// \brief Return the value's bytes in big-endian byte order
   constexpr std::array<uint8_t, 2> ToBigEndian() const {

From cb17f5675e6dd225afb7c036c2738ad655aa3773 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Tue, 14 Nov 2023 12:38:43 -0500
Subject: [PATCH 36/37] Relocate random Float16 function

---
 cpp/src/parquet/arrow/test_util.h | 23 +++++++----------------
 cpp/src/parquet/test_util.cc      | 10 ++++++++++
 cpp/src/parquet/test_util.h       |  4 ++++
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index 6036f47514eb1..b2be1b3c5354d 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -35,6 +35,7 @@
 #include "arrow/util/decimal.h"
 #include "arrow/util/float16.h"
 #include "parquet/column_reader.h"
+#include "parquet/test_util.h"
 
 namespace parquet {
 
@@ -66,26 +67,15 @@ struct Decimal256WithPrecisionAndScale {
   static constexpr int32_t scale = PRECISION - 1;
 };
 
-inline void RandomHalfFloatValues(int64_t n, uint32_t seed,
-                                  ::arrow::util::Float16 min_value,
-                                  ::arrow::util::Float16 max_value,
-                                  std::vector<uint16_t>* out) {
-  std::vector<float> values;
-  ::arrow::random_real(n, seed, static_cast<float>(min_value),
-                       static_cast<float>(max_value), &values);
-  out->resize(values.size());
-  std::transform(values.begin(), values.end(), out->begin(),
-                 [](float f) { return ::arrow::util::Float16(f).bits(); });
-}
-
 template <class ArrowType>
 ::arrow::enable_if_floating_point<ArrowType, Status> NonNullArray(
     size_t size, std::shared_ptr<Array>* out) {
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
-    RandomHalfFloatValues(size, 0, ::arrow::util::Float16(0.0f),
-                          ::arrow::util::Float16(1.0f), &values);
+    values.resize(size);
+    test::random_float16_numbers(static_cast<int>(size), 0, ::arrow::util::Float16(0.0f),
+                                 ::arrow::util::Float16(1.0f), values.data());
   } else {
     ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1),
                          &values);
@@ -221,8 +211,9 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
   if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
-    RandomHalfFloatValues(size, seed, ::arrow::util::Float16(-1e4f),
-                          ::arrow::util::Float16(1e4f), &values);
+    values.resize(size);
+    test::random_float16_numbers(static_cast<int>(size), 0, ::arrow::util::Float16(-1e4f),
+                                 ::arrow::util::Float16(1e4f), values.data());
   } else {
     ::arrow::random_real(size, seed, static_cast<c_type>(-1e10),
                          static_cast<c_type>(1e10), &values);
diff --git a/cpp/src/parquet/test_util.cc b/cpp/src/parquet/test_util.cc
index b65945cc7329f..a6fa8afc0f5b3 100644
--- a/cpp/src/parquet/test_util.cc
+++ b/cpp/src/parquet/test_util.cc
@@ -101,6 +101,16 @@ void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_v
   }
 }
 
+void random_float16_numbers(int n, uint32_t seed, ::arrow::util::Float16 min_value,
+                            ::arrow::util::Float16 max_value, uint16_t* out) {
+  std::vector<float> values(n);
+  random_numbers(n, seed, static_cast<float>(min_value), static_cast<float>(max_value),
+                 values.data());
+  for (int i = 0; i < n; ++i) {
+    out[i] = ::arrow::util::Float16(values[i]).bits();
+  }
+}
+
 void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out) {
   std::default_random_engine gen(seed);
   std::uniform_int_distribution<int> d(0, 255);
diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h
index c8578609e9b1d..59728cf53f699 100644
--- a/cpp/src/parquet/test_util.h
+++ b/cpp/src/parquet/test_util.h
@@ -33,6 +33,7 @@
 
 #include "arrow/io/memory.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/float16.h"
 
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
@@ -148,6 +149,9 @@ inline void random_numbers(int n, uint32_t seed, double min_value, double max_va
 void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value,
                           Int96* out);
 
+void random_float16_numbers(int n, uint32_t seed, ::arrow::util::Float16 min_value,
+                            ::arrow::util::Float16 max_value, uint16_t* out);
+
 void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out);
 
 void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size,

From 36b8a3b88792bebb6f561f5754e8d4f48f9160c5 Mon Sep 17 00:00:00 2001
From: benibus <bpharks@gmx.com>
Date: Tue, 14 Nov 2023 15:25:34 -0500
Subject: [PATCH 37/37] Add missing schema tests

---
 cpp/src/parquet/schema_test.cc | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc
index 603d9ed8e2124..a1b5557497d9c 100644
--- a/cpp/src/parquet/schema_test.cc
+++ b/cpp/src/parquet/schema_test.cc
@@ -1147,6 +1147,9 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
   auto check_is_UUID = [](const std::shared_ptr<const LogicalType>& logical_type) {
     return logical_type->is_UUID();
   };
+  auto check_is_float16 = [](const std::shared_ptr<const LogicalType>& logical_type) {
+    return logical_type->is_float16();
+  };
   auto check_is_null = [](const std::shared_ptr<const LogicalType>& logical_type) {
     return logical_type->is_null();
   };
@@ -1159,6 +1162,7 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
 
   std::vector<ConfirmNewTypeIncompatibilityArguments> cases = {
       {LogicalType::UUID(), check_is_UUID},
+      {LogicalType::Float16(), check_is_float16},
       {LogicalType::Null(), check_is_null},
       {LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), check_is_time},
       {LogicalType::Time(false, LogicalType::TimeUnit::MICROS), check_is_time},
@@ -1242,6 +1246,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeProperties) {
       {JSONLogicalType::Make(), false, true, true},
       {BSONLogicalType::Make(), false, true, true},
       {UUIDLogicalType::Make(), false, true, true},
+      {Float16LogicalType::Make(), false, true, true},
       {NoLogicalType::Make(), false, false, true},
   };
 
@@ -1351,7 +1356,8 @@ TEST(TestLogicalTypeOperation, LogicalTypeApplicability) {
     int physical_length;
   };
 
-  std::vector<InapplicableType> inapplicable_types = {{Type::FIXED_LEN_BYTE_ARRAY, 8},
+  std::vector<InapplicableType> inapplicable_types = {{Type::FIXED_LEN_BYTE_ARRAY, 1},
+                                                      {Type::FIXED_LEN_BYTE_ARRAY, 8},
                                                       {Type::FIXED_LEN_BYTE_ARRAY, 20},
                                                       {Type::BOOLEAN, -1},
                                                       {Type::INT32, -1},
@@ -1374,6 +1380,12 @@ TEST(TestLogicalTypeOperation, LogicalTypeApplicability) {
   for (const InapplicableType& t : inapplicable_types) {
     ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
   }
+
+  logical_type = LogicalType::Float16();
+  ASSERT_TRUE(logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 2));
+  for (const InapplicableType& t : inapplicable_types) {
+    ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
+  }
 }
 
 TEST(TestLogicalTypeOperation, DecimalLogicalTypeApplicability) {
@@ -1531,6 +1543,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
       {LogicalType::JSON(), "JSON", R"({"Type": "JSON"})"},
       {LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"},
       {LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"},
+      {LogicalType::Float16(), "Float16", R"({"Type": "Float16"})"},
       {LogicalType::None(), "None", R"({"Type": "None"})"},
   };
 
@@ -1580,6 +1593,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) {
       {LogicalType::JSON(), SortOrder::UNSIGNED},
       {LogicalType::BSON(), SortOrder::UNSIGNED},
       {LogicalType::UUID(), SortOrder::UNSIGNED},
+      {LogicalType::Float16(), SortOrder::SIGNED},
       {LogicalType::None(), SortOrder::UNKNOWN}};
 
   for (const ExpectedSortOrder& c : cases) {
@@ -1712,6 +1726,15 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   ASSERT_ANY_THROW(PrimitiveNode::Make("uuid", Repetition::REQUIRED,
                                        UUIDLogicalType::Make(),
                                        Type::FIXED_LEN_BYTE_ARRAY, 64));
+
+  // Incompatible primitive type ...
+  ASSERT_ANY_THROW(PrimitiveNode::Make("float16", Repetition::REQUIRED,
+                                       Float16LogicalType::Make(), Type::BYTE_ARRAY, 2));
+  // Incompatible primitive length ...
+  ASSERT_ANY_THROW(PrimitiveNode::Make("float16", Repetition::REQUIRED,
+                                       Float16LogicalType::Make(),
+                                       Type::FIXED_LEN_BYTE_ARRAY, 3));
+
   // Non-positive length argument for fixed length binary ...
   ASSERT_ANY_THROW(PrimitiveNode::Make("negative_length", Repetition::REQUIRED,
                                        NoLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY,
@@ -1902,6 +1925,9 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) {
        [this]() { return element_->logicalType.__isset.BSON; }},
       {"uuid", LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16, false,
        ConvertedType::NA, true, [this]() { return element_->logicalType.__isset.UUID; }},
+      {"float16", LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2, false,
+       ConvertedType::NA, true,
+       [this]() { return element_->logicalType.__isset.FLOAT16; }},
       {"none", LogicalType::None(), Type::INT64, -1, false, ConvertedType::NA, false,
        check_nothing}};
 
@@ -2238,6 +2264,7 @@ TEST(TestLogicalTypeSerialization, Roundtrips) {
       {LogicalType::JSON(), Type::BYTE_ARRAY, -1},
       {LogicalType::BSON(), Type::BYTE_ARRAY, -1},
       {LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16},
+      {LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2},
       {LogicalType::None(), Type::BOOLEAN, -1}};
 
   for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) {