Skip to content

Commit

Permalink
Support multiple endians in Float16 class
Browse files Browse the repository at this point in the history
  • Loading branch information
benibus committed Jun 18, 2023
1 parent c0c6269 commit 899ac72
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 25 deletions.
37 changes: 32 additions & 5 deletions cpp/src/arrow/util/float16.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace util {
///
/// NOTE: Methods in the class should not mutate the unerlying value or produce copies.
/// Such functionality is delegated to subclasses.
class ARROW_EXPORT Float16Base {
class Float16Base {
public:
Float16Base() = default;
constexpr explicit Float16Base(uint16_t value) : value_(value) {}
Expand All @@ -56,13 +56,32 @@ class ARROW_EXPORT Float16Base {
constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; }
constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }

void ToBytes(uint8_t* dest) const {
/// \brief Copy the value's bytes in native-endian byte order
void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }
/// \brief Return the value's bytes in native-endian byte order
std::array<uint8_t, 2> ToBytes() const {
std::array<uint8_t, 2> bytes;
ToBytes(bytes.data());
return bytes;
}

void ToLittleEndian(uint8_t* dest) const {
auto value = bit_util::ToLittleEndian(value_);
std::memcpy(dest, &value, sizeof(value));
}
std::array<uint8_t, 2> ToBytes() const {
std::array<uint8_t, 2> ToLittleEndian() const {
std::array<uint8_t, 2> bytes;
ToBytes(bytes.data());
ToLittleEndian(bytes.data());
return bytes;
}

void ToBigEndian(uint8_t* dest) const {
auto value = bit_util::ToBigEndian(value_);
std::memcpy(dest, &value, sizeof(value));
}
std::array<uint8_t, 2> ToBigEndian() const {
std::array<uint8_t, 2> bytes;
ToBigEndian(bytes.data());
return bytes;
}

Expand Down Expand Up @@ -120,16 +139,24 @@ class ARROW_EXPORT Float16Base {
};

/// \brief Wrapper class for an IEEE half-precision float, encoded as a `uint16_t`
class ARROW_EXPORT Float16 : public Float16Base {
class Float16 : public Float16Base {
public:
using Float16Base::Float16Base;

constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); }
constexpr Float16 operator+() const { return Float16(value_); }

/// \brief Read a `Float16` from memory in native-endian byte order
static Float16 FromBytes(const uint8_t* src) {
return Float16(SafeLoadAs<uint16_t>(src));
}

static Float16 FromLittleEndian(const uint8_t* src) {
return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
}
static Float16 FromBigEndian(const uint8_t* src) {
return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
}
};

static_assert(std::is_trivial_v<Float16>);
Expand Down
33 changes: 33 additions & 0 deletions cpp/src/arrow/util/float16_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
// specific language governing permissions and limitations
// under the License.

#include <array>
#include <utility>
#include <vector>

#include <gtest/gtest.h>

#include "arrow/testing/gtest_util.h"
#include "arrow/util/endian.h"
#include "arrow/util/float16.h"
#include "arrow/util/ubsan.h"

namespace arrow {
namespace util {
Expand Down Expand Up @@ -130,6 +133,36 @@ TYPED_TEST_SUITE(Float16OperatorTest, OperatorTypes);

TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); }

TEST(Float16Test, ToBytes) {
constexpr auto f16 = Float16(0xd01c);
auto bytes = f16.ToBytes();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
#if ARROW_LITTLE_ENDIAN
bytes = f16.ToLittleEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
bytes = f16.ToBigEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
#else
bytes = f16.ToLittleEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
bytes = f16.ToBigEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
#endif
}

TEST(Float16Test, FromBytes) {
constexpr uint16_t u16 = 0xd01c;
const auto* data = reinterpret_cast<const uint8_t*>(&u16);
ASSERT_EQ(Float16::FromBytes(data), Float16(0xd01c));
#if ARROW_LITTLE_ENDIAN
ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0xd01c));
ASSERT_EQ(Float16::FromBigEndian(data), Float16(0x1cd0));
#else
ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0));
ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c));
#endif
}

} // namespace
} // namespace util
} // namespace arrow
19 changes: 10 additions & 9 deletions cpp/src/parquet/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,19 @@ constexpr int value_length(int type_length, const FLBA& value) { return type_len
// Static "constants" for normalizing float16 min/max values. These need to be expressed
// as pointers because `Float16LogicalType` represents an FLBA.
const uint8_t* float16_lowest() {
static const auto bytes = std::numeric_limits<Float16>::lowest().ToBytes();
static const auto bytes = std::numeric_limits<Float16>::lowest().ToLittleEndian();
return bytes.data();
}
const uint8_t* float16_max() {
static const auto bytes = std::numeric_limits<Float16>::max().ToBytes();
static const auto bytes = std::numeric_limits<Float16>::max().ToLittleEndian();
return bytes.data();
}
const uint8_t* float16_positive_zero() {
static const auto bytes = Float16(0).ToBytes();
static const auto bytes = Float16(0).ToLittleEndian();
return bytes.data();
}
const uint8_t* float16_negative_zero() {
static const auto bytes = (-Float16(0)).ToBytes();
static const auto bytes = (-Float16(0)).ToLittleEndian();
return bytes.data();
}

Expand Down Expand Up @@ -305,12 +305,13 @@ struct Float16CompareHelper {
static T DefaultMax() { return T{float16_lowest()}; }

static T Coalesce(T val, T fallback) {
return val.ptr != nullptr && Float16::FromBytes(val.ptr).is_nan() ? fallback : val;
return val.ptr != nullptr && Float16::FromLittleEndian(val.ptr).is_nan() ? fallback
: val;
}

static inline bool Compare(int type_length, const T& a, const T& b) {
const auto lhs = Float16::FromBytes(a.ptr);
const auto rhs = Float16::FromBytes(b.ptr);
const auto lhs = Float16::FromLittleEndian(a.ptr);
const auto rhs = Float16::FromLittleEndian(b.ptr);
// NaN is handled here (same behavior as native float compare)
return lhs < rhs;
}
Expand Down Expand Up @@ -372,8 +373,8 @@ CleanStatistic(std::pair<T, T> min_max, LogicalType::Type::type) {
optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_max) {
FLBA min_flba = min_max.first;
FLBA max_flba = min_max.second;
Float16 min = Float16::FromBytes(min_flba.ptr);
Float16 max = Float16::FromBytes(max_flba.ptr);
Float16 min = Float16::FromLittleEndian(min_flba.ptr);
Float16 max = Float16::FromLittleEndian(max_flba.ptr);

if (min.is_nan() || max.is_nan()) {
return ::std::nullopt;
Expand Down
18 changes: 7 additions & 11 deletions cpp/src/parquet/statistics_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class BufferedFloat16 : public ::arrow::util::Float16Base {
public:
explicit BufferedFloat16(Float16 f16) : Float16Base(f16) {
buffer_ = *::arrow::AllocateBuffer(sizeof(value_));
ToBytes(buffer_->mutable_data());
ToLittleEndian(buffer_->mutable_data());
}
explicit BufferedFloat16(uint16_t value) : BufferedFloat16(Float16(value)) {}

Expand All @@ -77,10 +77,6 @@ class BufferedFloat16 : public ::arrow::util::Float16Base {
BufferedFloat16 operator+() const { return *this; }
BufferedFloat16 operator-() const { return BufferedFloat16(value_ ^ 0x8000); }

static BufferedFloat16 FromBytes(const uint8_t* src) {
return BufferedFloat16(Float16::FromBytes(src));
}

private:
std::shared_ptr<::arrow::Buffer> buffer_;
};
Expand Down Expand Up @@ -973,7 +969,7 @@ void TestStatisticsSortOrder<Float16LogicalType>::SetValues() {
values_buf_.resize(kNumBytes);
uint8_t* ptr = values_buf_.data();
for (int i = 0; i < NUM_VALUES; ++i) {
Float16(u16_vals[i]).ToBytes(ptr);
Float16(u16_vals[i]).ToLittleEndian(ptr);
values_[i].ptr = ptr;
ptr += kValueLen;
}
Expand Down Expand Up @@ -1259,9 +1255,9 @@ void TestFloatStatistics<T>::Init() {
template <>
void TestFloatStatistics<Float16LogicalType>::Init() {
data_buf_.resize(4);
(+Float16(0)).ToBytes(&data_buf_[0]);
(+Float16(0)).ToLittleEndian(&data_buf_[0]);
positive_zero_ = FLBA{&data_buf_[0]};
(-Float16(0)).ToBytes(&data_buf_[2]);
(-Float16(0)).ToLittleEndian(&data_buf_[2]);
negative_zero_ = FLBA{&data_buf_[2]};
}

Expand All @@ -1282,8 +1278,8 @@ void TestFloatStatistics<T>::CheckEq(const c_type& l, const c_type& r) {
}
template <>
void TestFloatStatistics<Float16LogicalType>::CheckEq(const c_type& a, const c_type& b) {
auto l = Float16::FromBytes(a.ptr);
auto r = Float16::FromBytes(b.ptr);
auto l = Float16::FromLittleEndian(a.ptr);
auto r = Float16::FromLittleEndian(b.ptr);
ASSERT_EQ(l, r);
}

Expand All @@ -1293,7 +1289,7 @@ bool TestFloatStatistics<T>::signbit(c_type val) {
}
template <>
bool TestFloatStatistics<Float16LogicalType>::signbit(c_type val) {
return Float16::FromBytes(val.ptr).signbit();
return Float16::FromLittleEndian(val.ptr).signbit();
}

template <typename T>
Expand Down

0 comments on commit 899ac72

Please sign in to comment.