From 899ac7265746814d3d577f1c0cc1e431db0819db Mon Sep 17 00:00:00 2001 From: benibus Date: Sat, 17 Jun 2023 21:55:15 -0400 Subject: [PATCH] Support multiple endians in Float16 class --- cpp/src/arrow/util/float16.h | 37 ++++++++++++++++++++++++++---- cpp/src/arrow/util/float16_test.cc | 33 ++++++++++++++++++++++++++ cpp/src/parquet/statistics.cc | 19 +++++++-------- cpp/src/parquet/statistics_test.cc | 18 ++++++--------- 4 files changed, 82 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index fedba0c29dc75..f2db88f3d3ea2 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -40,7 +40,7 @@ namespace util { /// /// NOTE: Methods in the class should not mutate the unerlying value or produce copies. /// Such functionality is delegated to subclasses. -class ARROW_EXPORT Float16Base { +class Float16Base { public: Float16Base() = default; constexpr explicit Float16Base(uint16_t value) : value_(value) {} @@ -56,13 +56,32 @@ class ARROW_EXPORT Float16Base { constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } - void ToBytes(uint8_t* dest) const { + /// \brief Copy the value's bytes in native-endian byte order + void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } + /// \brief Return the value's bytes in native-endian byte order + std::array ToBytes() const { + std::array bytes; + ToBytes(bytes.data()); + return bytes; + } + + void ToLittleEndian(uint8_t* dest) const { auto value = bit_util::ToLittleEndian(value_); std::memcpy(dest, &value, sizeof(value)); } - std::array ToBytes() const { + std::array ToLittleEndian() const { std::array bytes; - ToBytes(bytes.data()); + ToLittleEndian(bytes.data()); + return bytes; + } + + void ToBigEndian(uint8_t* dest) const { + auto value = bit_util::ToBigEndian(value_); + std::memcpy(dest, &value, sizeof(value)); + } + std::array ToBigEndian() const { + std::array bytes; + ToBigEndian(bytes.data()); return bytes; } @@ -120,16 +139,24 @@ class ARROW_EXPORT Float16Base { }; /// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t` -class ARROW_EXPORT Float16 : public Float16Base { +class Float16 : public Float16Base { public: using Float16Base::Float16Base; constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } constexpr Float16 operator+() const { return Float16(value_); } + /// \brief Read a `Float16` from memory in native-endian byte order static Float16 FromBytes(const uint8_t* src) { + return Float16(SafeLoadAs(src)); + } + + static Float16 FromLittleEndian(const uint8_t* src) { return Float16(bit_util::FromLittleEndian(SafeLoadAs(src))); } + static Float16 FromBigEndian(const uint8_t* src) { + return Float16(bit_util::FromBigEndian(SafeLoadAs(src))); + } }; static_assert(std::is_trivial_v); diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc index 75ee9dc816b97..4e6bc64d5b6a6 100644 --- a/cpp/src/arrow/util/float16_test.cc +++ b/cpp/src/arrow/util/float16_test.cc @@ -15,13 +15,16 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include #include "arrow/testing/gtest_util.h" +#include "arrow/util/endian.h" #include "arrow/util/float16.h" +#include "arrow/util/ubsan.h" namespace arrow { namespace util { @@ -130,6 +133,36 @@ TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes); TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); } +TEST(Float16Test, ToBytes) { + constexpr auto f16 = Float16(0xd01c); + auto bytes = f16.ToBytes(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); +#if ARROW_LITTLE_ENDIAN + bytes = f16.ToLittleEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); + bytes = f16.ToBigEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0x1cd0); +#else + bytes = f16.ToLittleEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0x1cd0); + bytes = f16.ToBigEndian(); + ASSERT_EQ(SafeLoadAs(bytes.data()), 0xd01c); +#endif +} + +TEST(Float16Test, FromBytes) { + constexpr uint16_t u16 = 0xd01c; + const auto* data = reinterpret_cast(&u16); + ASSERT_EQ(Float16::FromBytes(data), Float16(0xd01c)); +#if ARROW_LITTLE_ENDIAN + ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0xd01c)); + ASSERT_EQ(Float16::FromBigEndian(data), Float16(0x1cd0)); +#else + ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0)); + ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c)); +#endif +} + } // namespace } // namespace util } // namespace arrow diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index a72886fb58bfc..e70668e6c393a 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -58,19 +58,19 @@ constexpr int value_length(int type_length, const FLBA& value) { return type_len // Static "constants" for normalizing float16 min/max values. These need to be expressed // as pointers because `Float16LogicalType` represents an FLBA. const uint8_t* float16_lowest() { - static const auto bytes = std::numeric_limits::lowest().ToBytes(); + static const auto bytes = std::numeric_limits::lowest().ToLittleEndian(); return bytes.data(); } const uint8_t* float16_max() { - static const auto bytes = std::numeric_limits::max().ToBytes(); + static const auto bytes = std::numeric_limits::max().ToLittleEndian(); return bytes.data(); } const uint8_t* float16_positive_zero() { - static const auto bytes = Float16(0).ToBytes(); + static const auto bytes = Float16(0).ToLittleEndian(); return bytes.data(); } const uint8_t* float16_negative_zero() { - static const auto bytes = (-Float16(0)).ToBytes(); + static const auto bytes = (-Float16(0)).ToLittleEndian(); return bytes.data(); } @@ -305,12 +305,13 @@ struct Float16CompareHelper { static T DefaultMax() { return T{float16_lowest()}; } static T Coalesce(T val, T fallback) { - return val.ptr != nullptr && Float16::FromBytes(val.ptr).is_nan() ? fallback : val; + return val.ptr != nullptr && Float16::FromLittleEndian(val.ptr).is_nan() ? fallback + : val; } static inline bool Compare(int type_length, const T& a, const T& b) { - const auto lhs = Float16::FromBytes(a.ptr); - const auto rhs = Float16::FromBytes(b.ptr); + const auto lhs = Float16::FromLittleEndian(a.ptr); + const auto rhs = Float16::FromLittleEndian(b.ptr); // NaN is handled here (same behavior as native float compare) return lhs < rhs; } @@ -372,8 +373,8 @@ CleanStatistic(std::pair min_max, LogicalType::Type::type) { optional> CleanFloat16Statistic(std::pair min_max) { FLBA min_flba = min_max.first; FLBA max_flba = min_max.second; - Float16 min = Float16::FromBytes(min_flba.ptr); - Float16 max = Float16::FromBytes(max_flba.ptr); + Float16 min = Float16::FromLittleEndian(min_flba.ptr); + Float16 max = Float16::FromLittleEndian(max_flba.ptr); if (min.is_nan() || max.is_nan()) { return ::std::nullopt; diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 4aaa29887500d..820540d5614cc 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -67,7 +67,7 @@ class BufferedFloat16 : public ::arrow::util::Float16Base { public: explicit BufferedFloat16(Float16 f16) : Float16Base(f16) { buffer_ = *::arrow::AllocateBuffer(sizeof(value_)); - ToBytes(buffer_->mutable_data()); + ToLittleEndian(buffer_->mutable_data()); } explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {} @@ -77,10 +77,6 @@ class BufferedFloat16 : public ::arrow::util::Float16Base { BufferedFloat16 operator+() const { return *this; } BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); } - static BufferedFloat16 FromBytes(const uint8_t* src) { - return BufferedFloat16(Float16::FromBytes(src)); - } - private: std::shared_ptr<::arrow::Buffer> buffer_; }; @@ -973,7 +969,7 @@ void TestStatisticsSortOrder::SetValues() { values_buf_.resize(kNumBytes); uint8_t* ptr = values_buf_.data(); for (int i = 0; i < NUM_VALUES; ++i) { - Float16(u16_vals[i]).ToBytes(ptr); + Float16(u16_vals[i]).ToLittleEndian(ptr); values_[i].ptr = ptr; ptr += kValueLen; } @@ -1259,9 +1255,9 @@ void TestFloatStatistics::Init() { template <> void TestFloatStatistics::Init() { data_buf_.resize(4); - (+Float16(0)).ToBytes(&data_buf_[0]); + (+Float16(0)).ToLittleEndian(&data_buf_[0]); positive_zero_ = FLBA{&data_buf_[0]}; - (-Float16(0)).ToBytes(&data_buf_[2]); + (-Float16(0)).ToLittleEndian(&data_buf_[2]); negative_zero_ = FLBA{&data_buf_[2]}; } @@ -1282,8 +1278,8 @@ void TestFloatStatistics::CheckEq(const c_type& l, const c_type& r) { } template <> void TestFloatStatistics::CheckEq(const c_type& a, const c_type& b) { - auto l = Float16::FromBytes(a.ptr); - auto r = Float16::FromBytes(b.ptr); + auto l = Float16::FromLittleEndian(a.ptr); + auto r = Float16::FromLittleEndian(b.ptr); ASSERT_EQ(l, r); } @@ -1293,7 +1289,7 @@ bool TestFloatStatistics::signbit(c_type val) { } template <> bool TestFloatStatistics::signbit(c_type val) { - return Float16::FromBytes(val.ptr).signbit(); + return Float16::FromLittleEndian(val.ptr).signbit(); } template