Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc][math] Implement cbrtf function correctly rounded to all rounding modes. #97936

Merged
merged 2 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/darwin/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrtf
libc.src.math.copysign
libc.src.math.copysignf
libc.src.math.copysignl
Expand Down
1 change: 1 addition & 0 deletions libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.atanf
libc.src.math.atanh
libc.src.math.atanhf
libc.src.math.cbrtf
libc.src.math.ceil
libc.src.math.ceilf
libc.src.math.copysign
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrtf
libc.src.math.ceil
libc.src.math.ceilf
libc.src.math.ceill
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrtf
libc.src.math.ceil
libc.src.math.ceilf
libc.src.math.ceill
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrtf
libc.src.math.ceil
libc.src.math.ceilf
libc.src.math.ceill
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.canonicalize
libc.src.math.canonicalizef
libc.src.math.canonicalizel
libc.src.math.cbrtf
libc.src.math.ceil
libc.src.math.ceilf
libc.src.math.ceill
Expand Down
1 change: 1 addition & 0 deletions libc/config/windows/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.cbrtf
libc.src.math.copysign
libc.src.math.copysignf
libc.src.math.copysignl
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| atanpi | | | | | | 7.12.4.10 | F.10.1.10 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| cbrt | | | | | | 7.12.7.1 | F.10.4.1 |
| cbrt | |check| | | | | | 7.12.7.1 | F.10.4.1 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| compoundn | | | | | | 7.12.7.2 | F.10.4.2 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
Expand Down
2 changes: 2 additions & 0 deletions libc/spec/stdc.td
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,8 @@ def StdC : StandardSpec<"stdc"> {
],
[], // Enumerations
[
FunctionSpec<"cbrtf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

FunctionSpec<"copysign", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"copysignf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"copysignl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
Expand Down
2 changes: 2 additions & 0 deletions libc/src/__support/FPUtil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ add_header_library(
multiply_add.h
DEPENDS
libc.src.__support.common
FLAGS
FMA_OPT
)

add_header_library(
Expand Down
6 changes: 6 additions & 0 deletions libc/src/__support/FPUtil/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; }

namespace LIBC_NAMESPACE::fputil {

LIBC_INLINE int clear_except_if_required(int excepts) {
if (math_errhandling & MATH_ERREXCEPT)
return clear_except(excepts);
return 0;
}

LIBC_INLINE int set_except_if_required(int excepts) {
if (math_errhandling & MATH_ERREXCEPT)
return set_except(excepts);
Expand Down
2 changes: 2 additions & 0 deletions libc/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ add_math_entrypoint_object(canonicalizel)
add_math_entrypoint_object(canonicalizef16)
add_math_entrypoint_object(canonicalizef128)

add_math_entrypoint_object(cbrtf)

add_math_entrypoint_object(ceil)
add_math_entrypoint_object(ceilf)
add_math_entrypoint_object(ceill)
Expand Down
18 changes: 18 additions & 0 deletions libc/src/math/cbrtf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//===-- Implementation header for cbrtf -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_MATH_CBRTF_H
#define LLVM_LIBC_SRC_MATH_CBRTF_H

namespace LIBC_NAMESPACE {

float cbrtf(float x);

} // namespace LIBC_NAMESPACE

#endif // LLVM_LIBC_SRC_MATH_CBRTF_H
16 changes: 16 additions & 0 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4092,3 +4092,19 @@ add_entrypoint_object(
COMPILE_OPTIONS
-O3
)

add_entrypoint_object(
cbrtf
SRCS
cbrtf.cpp
HDRS
../cbrtf.h
COMPILE_OPTIONS
-O3
DEPENDS
libc.hdr.fenv_macros
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.macros.optimization
)
157 changes: 157 additions & 0 deletions libc/src/math/generic/cbrtf.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
//===-- Implementation of cbrtf function ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/cbrtf.h"
#include "hdr/fenv_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/common.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

namespace LIBC_NAMESPACE {

namespace {

// Look up table for 2^(i/3) for i = 0, 1, 2.
constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0};

// Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1
// generated by Sollya with:
// > for i from 0 to 15 do {
// P = fpminimax((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]);
// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",",
// coeff(P, 6), "},");
// };
// Then (1 + x)^(1/3) ~ 1 + x * P(x).
constexpr double COEFFS[16][7] = {
{0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5,
-0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6,
0x1.047f75e0aff14p-6},
{0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5,
-0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6,
0x1.716eca1d6e3bcp-7},
{0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5,
-0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6,
0x1.0aeca34893785p-7},
{0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5,
-0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6,
0x1.88410674c6a5dp-8},
{0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5,
-0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7,
0x1.249bb51a1c498p-8},
{0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5,
-0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7,
0x1.ba9230918fa2ep-9},
{0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5,
-0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7,
0x1.52e3e17c71069p-9},
{0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5,
-0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7,
0x1.0677d9af6aad4p-9},
{0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5,
-0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7,
0x1.9ad325dc7adcbp-10},
{0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5,
-0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8,
0x1.449f5ee175c69p-10},
{0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5,
-0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8,
0x1.02cb36389bd79p-10},
{0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5,
-0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8,
0x1.a008ae91f5936p-11},
{0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5,
-0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8,
0x1.50f8174cdb6e9p-11},
{0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5,
-0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9,
0x1.12e0e94e8586dp-11},
{0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5,
-0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9,
0x1.c3726535f1fc6p-12},
{0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5,
-0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9,
0x1.75077abf18d84p-12},
};

} // anonymous namespace

LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) {
using FloatBits = typename fputil::FPBits<float>;
using DoubleBits = typename fputil::FPBits<double>;

FloatBits x_bits(x);

uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff;
uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN;

if (LIBC_UNLIKELY(x_abs == 0 || x_abs >= 0x7f80'0000)) {
// x is 0, Inf, or NaN.
return x;
}

double xd = static_cast<double>(x);
DoubleBits xd_bits(xd);

// When using biased exponent of x in double precision,
// x_e = real_exponent_of_x + 1023
// Then:
// x_e / 3 = real_exponent_of_x / 3 + 1023/3
// = real_exponent_of_x / 3 + 341
// So to make it the correct biased exponent of x^(1/3), we add
// 1023 - 341 = 682
// to the quotient x_e / 3.
unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent());
unsigned out_e = (x_e / 3 + 682) | sign_bit;
unsigned shift_e = x_e % 3;

// Set x_m = 2^(x_e % 3) * (1.mantissa)
uint64_t x_m = xd_bits.get_mantissa();
// Use the leading 4 bits for look up table
unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4));

x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS)
<< DoubleBits::FRACTION_LEN;

double x_reduced = DoubleBits(x_m).get_val();
double dx = x_reduced - 1.0;

double dx_sq = dx * dx;
double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0);
double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]);
double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]);
double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]);

double dx_4 = dx_sq * dx_sq;
double p0 = fputil::multiply_add(dx_sq, c1, c0);
double p1 = fputil::multiply_add(dx_sq, c3, c2);

double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e];

uint64_t r_m = DoubleBits(r).get_mantissa();
// Check if the output is exact. To be exact, the smallest 1-bit of the
// output has to be at least 2^-7 or higher. So we check the lowest 44 bits
// to see if they are within 2^(-52 + 3) errors from all zeros, then the
// result cube root is exact.
if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) {
if ((r_m & 0xfffffffffff) <= 8)
r_m &= 0xffff'ffff'ffff'ffe0;
else
r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20;
fputil::clear_except_if_required(FE_INEXACT);
}
// Adjust exponent and sign.
uint64_t r_bits =
r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN);

return static_cast<float>(DoubleBits(r_bits).get_val());
}

} // namespace LIBC_NAMESPACE
12 changes: 12 additions & 0 deletions libc/test/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2213,6 +2213,18 @@ add_fp_unittest(
libc.src.math.f16sqrtl
)

add_fp_unittest(
cbrtf_test
NEED_MPFR
SUITE
libc-math-unittests
SRCS
cbrtf_test.cpp
DEPENDS
libc.src.math.cbrtf
libc.src.__support.FPUtil.fp_bits
)

add_subdirectory(generic)
add_subdirectory(smoke)

Expand Down
42 changes: 42 additions & 0 deletions libc/test/src/math/cbrtf_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//===-- Unittests for cbrtf -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hdr/math_macros.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/math/cbrtf.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"

using LlvmLibcCbrtfTest = LIBC_NAMESPACE::testing::FPTest<float>;

namespace mpfr = LIBC_NAMESPACE::testing::mpfr;

TEST_F(LlvmLibcCbrtfTest, InFloatRange) {
constexpr uint32_t COUNT = 100'000;
const uint32_t STEP = FPBits(inf).uintval() / COUNT;
for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
float x = FPBits(v).get_val();
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, x,
LIBC_NAMESPACE::cbrtf(x), 0.5);
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, -x,
LIBC_NAMESPACE::cbrtf(-x), 0.5);
}
}

TEST_F(LlvmLibcCbrtfTest, SpecialValues) {
constexpr float INPUTS[] = {
0x1.60451p2f, 0x1.31304cp1f, 0x1.d17cp2f, 0x1.bp-143f, 0x1.338cp2f,
};
for (float v : INPUTS) {
float x = FPBits(v).get_val();
mpfr::ForceRoundingMode r(mpfr::RoundingMode::Upward);
EXPECT_MPFR_MATCH(mpfr::Operation::Cbrt, x, LIBC_NAMESPACE::cbrtf(x), 0.5,
mpfr::RoundingMode::Upward);
}
}
16 changes: 16 additions & 0 deletions libc/test/src/math/exhaustive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,19 @@ add_fp_unittest(
LINK_LIBRARIES
-lpthread
)

add_fp_unittest(
cbrtf_test
NO_RUN_POSTBUILD
NEED_MPFR
SUITE
libc_math_exhaustive_tests
SRCS
cbrtf_test.cpp
DEPENDS
.exhaustive_test
libc.src.math.cbrtf
libc.src.__support.FPUtil.fp_bits
LINK_LIBRARIES
-lpthread
)
Loading
Loading