From 3ff1473daa080867889f17e35562b6cfe11d8912 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 6 Oct 2023 14:18:31 +0200 Subject: [PATCH] revert: reduce guarantees on signed zero/nan --- .../src/compute/arithmetics/basic/div.rs | 23 +++++++------------ .../src/chunked_array/arithmetic/numeric.rs | 13 +++-------- docs/user-guide/concepts/data-types.md | 15 +++++------- 3 files changed, 17 insertions(+), 34 deletions(-) diff --git a/crates/nano-arrow/src/compute/arithmetics/basic/div.rs b/crates/nano-arrow/src/compute/arithmetics/basic/div.rs index e14d34b57949..a5137ca9a0cc 100644 --- a/crates/nano-arrow/src/compute/arithmetics/basic/div.rs +++ b/crates/nano-arrow/src/compute/arithmetics/basic/div.rs @@ -1,5 +1,5 @@ //! Definition of basic div operations with primitive arrays -use std::ops::{Add, Div}; +use std::ops::Div; use num_traits::{CheckedDiv, NumCast}; use strength_reduce::{ @@ -29,18 +29,14 @@ use crate::datatypes::PrimitiveType; /// ``` pub fn div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray where - T: NativeArithmetics + Add + Div, + T: NativeArithmetics + Div, { - // Adding zero to divisor ensures x/0 becomes +infinity, ignoring - // the sign of the zero. if rhs.null_count() == 0 { - binary(lhs, rhs, lhs.data_type().clone(), |a, b| { - a / (b + T::zeroed()) - }) + binary(lhs, rhs, lhs.data_type().clone(), |a, b| a / b) } else { check_same_len(lhs, rhs).unwrap(); let values = lhs.iter().zip(rhs.iter()).map(|(l, r)| match (l, r) { - (Some(l), Some(r)) => Some(*l / (*r + T::zeroed())), + (Some(l), Some(r)) => Some(*l / *r), _ => None, }); @@ -65,19 +61,16 @@ where /// ``` pub fn checked_div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray where - T: NativeArithmetics + Add + CheckedDiv, + T: NativeArithmetics + CheckedDiv, { - // Adding zero to divisor ensures x/0 becomes +infinity, ignoring - // the sign of the zero. - let op = move |a: T, b: T| a.checked_div(&(b + T::zeroed())); - + let op = move |a: T, b: T| a.checked_div(&b); binary_checked(lhs, rhs, lhs.data_type().clone(), op) } // Implementation of ArrayDiv trait for PrimitiveArrays impl ArrayDiv> for PrimitiveArray where - T: NativeArithmetics + Add + Div, + T: NativeArithmetics + Div, { fn div(&self, rhs: &PrimitiveArray) -> Self { div(self, rhs) @@ -87,7 +80,7 @@ where // Implementation of ArrayCheckedDiv trait for PrimitiveArrays impl ArrayCheckedDiv> for PrimitiveArray where - T: NativeArithmetics + Add + CheckedDiv, + T: NativeArithmetics + CheckedDiv, { fn checked_div(&self, rhs: &PrimitiveArray) -> Self { checked_div(self, rhs) diff --git a/crates/polars-core/src/chunked_array/arithmetic/numeric.rs b/crates/polars-core/src/chunked_array/arithmetic/numeric.rs index 9d125501b0b9..940563f50c23 100644 --- a/crates/polars-core/src/chunked_array/arithmetic/numeric.rs +++ b/crates/polars-core/src/chunked_array/arithmetic/numeric.rs @@ -114,9 +114,7 @@ where self, rhs, ::div, - // Adding zero to divisor ensures x/0 becomes +infinity, ignoring - // the sign of the zero. - |lhs, rhs| lhs / (rhs + T::Native::zero()), + |lhs, rhs| lhs / rhs, ) } } @@ -196,9 +194,7 @@ where self, rhs, |a, b| arity_assign::binary(a, b, |a, b| a / b), - // Adding zero to divisor ensures x/0 becomes +infinity, ignoring - // the sign of the zero. - |lhs, rhs| lhs / (rhs + T::Native::zero()), + |lhs, rhs| lhs / rhs, ) } } @@ -360,10 +356,7 @@ where type Output = ChunkedArray; fn div(self, rhs: N) -> Self::Output { - // Adding zero to divisor ensures x/0 becomes +infinity, ignoring - // the sign of the zero. - #[allow(clippy::suspicious_arithmetic_impl)] - (&self).div(rhs + N::zero()) + (&self).div(rhs) } } diff --git a/docs/user-guide/concepts/data-types.md b/docs/user-guide/concepts/data-types.md index 32e12acc4e0b..77bed2d51024 100644 --- a/docs/user-guide/concepts/data-types.md +++ b/docs/user-guide/concepts/data-types.md @@ -34,15 +34,12 @@ To learn more about the internal representation of these data types, check the [ `Polars` generally follows the IEEE 754 floating point standard for `Float32` and `Float64`, with some exceptions: -- `Polars` does not support signed zero and conceptually only has a single zero value. -- `Polars` has a single canonical NaN representation and does not support signed NaNs or NaN payloads. -- The canonical NaN compares equal to itself, and greater than any non-NaN value. +- Any NaN compares equal to any other NaN, and greater than any non-NaN value. +- Operations do not guarantee any particular behavior on the sign of zero or NaN, + nor on the payload of NaN values. This is not just limited to arithmetic operations, + e.g. a sort or group by operation may canonicalize all zeroes to +0 and all NaNs + to a positive NaN without payload for efficient equality checks. -The above canonicalization should be consistently applied for user-facing data, arithmetic -and file-format I/O, but may be violated for zero-copy in-memory export. In such -cases `Polars` simply provides no guarantees around which sign or payload NaNs/zeros -have. - -Finally, `Polars` always attempts to provide reasonably accurate results for floating point computations, but does not provide guarantees +`Polars` always attempts to provide reasonably accurate results for floating point computations, but does not provide guarantees on the error unless mentioned otherwise. Generally speaking 100% accurate results are infeasibly expensive to acquire (requiring much larger internal representations than 64-bit floats), and thus some error is always to be expected.