From c580ef061c066dc31b34163312b23528a828f2a5 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Fri, 2 May 2025 09:20:13 +0200 Subject: [PATCH 1/3] Use rounding instructions on aarch64 --- libm/src/math/arch/aarch64.rs | 198 ++++++++++++++++++++++++++++++++-- libm/src/math/arch/mod.rs | 15 +++ libm/src/math/ceil.rs | 16 ++- libm/src/math/floor.rs | 16 ++- libm/src/math/round.rs | 18 ++++ libm/src/math/roundeven.rs | 18 ++++ libm/src/math/trunc.rs | 16 ++- 7 files changed, 280 insertions(+), 17 deletions(-) diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs index 020bb731c..d099cc6ad 100644 --- a/libm/src/math/arch/aarch64.rs +++ b/libm/src/math/arch/aarch64.rs @@ -30,11 +30,156 @@ pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 { x } +pub fn ceil(mut x: f64) -> f64 { + // SAFETY: `frintp` is available with neon and has no side effects. + unsafe { + asm!( + "frintp {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn ceilf(mut x: f32) -> f32 { + // SAFETY: `frintp` is available with neon and has no side effects. + unsafe { + asm!( + "frintp {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn ceilf16(mut x: f16) -> f16 { + // SAFETY: `frintp` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + unsafe { + asm!( + "frintp {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn floor(mut x: f64) -> f64 { + // SAFETY: `frintm` is available with neon and has no side effects. + unsafe { + asm!( + "frintm {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn floorf(mut x: f32) -> f32 { + // SAFETY: `frintm` is available with neon and has no side effects. + unsafe { + asm!( + "frintm {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn floorf16(mut x: f16) -> f16 { + // SAFETY: `frintm` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + unsafe { + asm!( + "frintm {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + pub fn rint(mut x: f64) -> f64 { + // SAFETY: `frintx` is available with neon and has no side effects. + unsafe { + asm!( + "frintx {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn rintf(mut x: f32) -> f32 { + // SAFETY: `frintx` is available with neon and has no side effects. + unsafe { + asm!( + "frintx {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn rintf16(mut x: f16) -> f16 { + // SAFETY: `frintx` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + unsafe { + asm!( + "frintx {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn round(mut x: f64) -> f64 { + // SAFETY: `frinta` is available with neon and has no side effects. + unsafe { + asm!( + "frinta {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn roundf(mut x: f32) -> f32 { + // SAFETY: `frinta` is available with neon and has no side effects. + unsafe { + asm!( + "frinta {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn roundf16(mut x: f16) -> f16 { + // SAFETY: `frinta` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + unsafe { + asm!( + "frinta {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn roundeven(mut x: f64) -> f64 { // SAFETY: `frintn` is available with neon and has no side effects. - // - // `frintn` is always round-to-nearest which does not match the C specification, but Rust does - // not support rounding modes. unsafe { asm!( "frintn {x:d}, {x:d}", @@ -45,11 +190,8 @@ pub fn rint(mut x: f64) -> f64 { x } -pub fn rintf(mut x: f32) -> f32 { +pub fn roundevenf(mut x: f32) -> f32 { // SAFETY: `frintn` is available with neon and has no side effects. - // - // `frintn` is always round-to-nearest which does not match the C specification, but Rust does - // not support rounding modes. unsafe { asm!( "frintn {x:s}, {x:s}", @@ -61,11 +203,8 @@ pub fn rintf(mut x: f32) -> f32 { } #[cfg(all(f16_enabled, target_feature = "fp16"))] -pub fn rintf16(mut x: f16) -> f16 { +pub fn roundevenf16(mut x: f16) -> f16 { // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects. - // - // `frintn` is always round-to-nearest which does not match the C specification, but Rust does - // not support rounding modes. unsafe { asm!( "frintn {x:h}, {x:h}", @@ -76,6 +215,43 @@ pub fn rintf16(mut x: f16) -> f16 { x } +pub fn trunc(mut x: f64) -> f64 { + // SAFETY: `frintz` is available with neon and has no side effects. + unsafe { + asm!( + "frintz {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn truncf(mut x: f32) -> f32 { + // SAFETY: `frintz` is available with neon and has no side effects. + unsafe { + asm!( + "frintz {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn truncf16(mut x: f16) -> f16 { + // SAFETY: `frintz` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + unsafe { + asm!( + "frintz {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + pub fn sqrt(mut x: f64) -> f64 { // SAFETY: `fsqrt` is available with neon and has no side effects. unsafe { diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index d9f2aad66..ad8a950cb 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -26,15 +26,30 @@ cfg_if! { pub use aarch64::{ fma, fmaf, + ceil, + ceilf, + floor, + floorf, + round, + roundf, rint, rintf, + roundeven, + roundevenf, + trun, + truncf sqrt, sqrtf, }; #[cfg(all(f16_enabled, target_feature = "fp16"))] pub use aarch64::{ + ceilf16, + floorf16, + roundf16, rintf16, + roundevenf16, + truncf16 sqrtf16, }; } diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs index 4e1035457..47052f88a 100644 --- a/libm/src/math/ceil.rs +++ b/libm/src/math/ceil.rs @@ -4,6 +4,12 @@ #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceilf16(x: f16) -> f16 { + select_implementation! { + name: ceilf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + super::generic::ceil(x) } @@ -14,7 +20,10 @@ pub fn ceilf16(x: f16) -> f16 { pub fn ceilf(x: f32) -> f32 { select_implementation! { name: ceilf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), args: x, } @@ -28,7 +37,10 @@ pub fn ceilf(x: f32) -> f32 { pub fn ceil(x: f64) -> f64 { select_implementation! { name: ceil, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), args: x, } diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs index 3c5eab101..52efb0a3c 100644 --- a/libm/src/math/floor.rs +++ b/libm/src/math/floor.rs @@ -4,6 +4,12 @@ #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floorf16(x: f16) -> f16 { + select_implementation! { + name: floorf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + return super::generic::floor(x); } @@ -14,7 +20,10 @@ pub fn floorf16(x: f16) -> f16 { pub fn floor(x: f64) -> f64 { select_implementation! { name: floor, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), args: x, } @@ -29,7 +38,10 @@ pub fn floor(x: f64) -> f64 { pub fn floorf(x: f32) -> f32 { select_implementation! { name: floorf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), args: x, } diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index 6cd091cd7..df10c5563 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -2,18 +2,36 @@ #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundf16(x: f16) -> f16 { + select_implementation! { + name: roundf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + super::generic::round(x) } /// Round `x` to the nearest integer, breaking ties away from zero. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundf(x: f32) -> f32 { + select_implementation! { + name: roundf, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + super::generic::round(x) } /// Round `x` to the nearest integer, breaking ties away from zero. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(x: f64) -> f64 { + select_implementation! { + name: round, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + super::generic::round(x) } diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs index 6e621d762..f5916ec4f 100644 --- a/libm/src/math/roundeven.rs +++ b/libm/src/math/roundeven.rs @@ -5,6 +5,12 @@ use super::support::{Float, Round}; #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundevenf16(x: f16) -> f16 { + select_implementation! { + name: roundevenf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + roundeven_impl(x) } @@ -12,6 +18,12 @@ pub fn roundevenf16(x: f16) -> f16 { /// `roundToIntegralTiesToEven`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundevenf(x: f32) -> f32 { + select_implementation! { + name: roundevenf, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + roundeven_impl(x) } @@ -19,6 +31,12 @@ pub fn roundevenf(x: f32) -> f32 { /// `roundToIntegralTiesToEven`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundeven(x: f64) -> f64 { + select_implementation! { + name: roundeven, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + roundeven_impl(x) } diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs index fa50d55e1..ab87b2ae0 100644 --- a/libm/src/math/trunc.rs +++ b/libm/src/math/trunc.rs @@ -4,6 +4,12 @@ #[cfg(f16_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn truncf16(x: f16) -> f16 { + select_implementation! { + name: truncf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + super::generic::trunc(x) } @@ -14,7 +20,10 @@ pub fn truncf16(x: f16) -> f16 { pub fn truncf(x: f32) -> f32 { select_implementation! { name: truncf, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), args: x, } @@ -28,7 +37,10 @@ pub fn truncf(x: f32) -> f32 { pub fn trunc(x: f64) -> f64 { select_implementation! { name: trunc, - use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), args: x, } From 40b4180db48de910531cbaa5b237279150badbac Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Fri, 2 May 2025 09:30:29 +0200 Subject: [PATCH 2/3] export fix --- libm/src/math/arch/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs index ad8a950cb..248ab9dbf 100644 --- a/libm/src/math/arch/mod.rs +++ b/libm/src/math/arch/mod.rs @@ -36,8 +36,8 @@ cfg_if! { rintf, roundeven, roundevenf, - trun, - truncf + trunc, + truncf, sqrt, sqrtf, }; @@ -49,7 +49,7 @@ cfg_if! { roundf16, rintf16, roundevenf16, - truncf16 + truncf16, sqrtf16, }; } From ca8629e2c802c004f45ed61e3ea34344ab1beb18 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Fri, 2 May 2025 09:58:35 +0200 Subject: [PATCH 3/3] final fix --- etc/function-definitions.json | 15 +++++++++++++++ libm/src/math/round.rs | 4 ++-- libm/src/math/roundeven.rs | 4 ++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 9e5774eaf..518fdf35c 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -98,6 +98,7 @@ }, "ceil": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/arch/i586.rs", "libm/src/math/arch/wasm32.rs", "libm/src/math/ceil.rs", @@ -107,6 +108,7 @@ }, "ceilf": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/arch/wasm32.rs", "libm/src/math/ceil.rs", "libm/src/math/generic/ceil.rs" @@ -122,6 +124,7 @@ }, "ceilf16": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/ceil.rs", "libm/src/math/generic/ceil.rs" ], @@ -311,6 +314,7 @@ }, "floor": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/arch/i586.rs", "libm/src/math/arch/wasm32.rs", "libm/src/math/floor.rs", @@ -320,6 +324,7 @@ }, "floorf": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/arch/wasm32.rs", "libm/src/math/floor.rs", "libm/src/math/generic/floor.rs" @@ -335,6 +340,7 @@ }, "floorf16": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/floor.rs", "libm/src/math/generic/floor.rs" ], @@ -815,6 +821,7 @@ }, "round": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/generic/round.rs", "libm/src/math/round.rs" ], @@ -822,12 +829,14 @@ }, "roundeven": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/roundeven.rs" ], "type": "f64" }, "roundevenf": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/roundeven.rs" ], "type": "f32" @@ -840,12 +849,14 @@ }, "roundevenf16": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/roundeven.rs" ], "type": "f16" }, "roundf": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/generic/round.rs", "libm/src/math/round.rs" ], @@ -860,6 +871,7 @@ }, "roundf16": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/generic/round.rs", "libm/src/math/round.rs" ], @@ -1002,6 +1014,7 @@ }, "trunc": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/arch/wasm32.rs", "libm/src/math/generic/trunc.rs", "libm/src/math/trunc.rs" @@ -1010,6 +1023,7 @@ }, "truncf": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/arch/wasm32.rs", "libm/src/math/generic/trunc.rs", "libm/src/math/trunc.rs" @@ -1025,6 +1039,7 @@ }, "truncf16": { "sources": [ + "libm/src/math/arch/aarch64.rs", "libm/src/math/generic/trunc.rs", "libm/src/math/trunc.rs" ], diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs index df10c5563..335857cb9 100644 --- a/libm/src/math/round.rs +++ b/libm/src/math/round.rs @@ -16,7 +16,7 @@ pub fn roundf16(x: f16) -> f16 { pub fn roundf(x: f32) -> f32 { select_implementation! { name: roundf, - use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + use_arch: all(target_arch = "aarch64", target_feature = "neon"), args: x, } @@ -28,7 +28,7 @@ pub fn roundf(x: f32) -> f32 { pub fn round(x: f64) -> f64 { select_implementation! { name: round, - use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + use_arch: all(target_arch = "aarch64", target_feature = "neon"), args: x, } diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs index f5916ec4f..1a2e8ef99 100644 --- a/libm/src/math/roundeven.rs +++ b/libm/src/math/roundeven.rs @@ -20,7 +20,7 @@ pub fn roundevenf16(x: f16) -> f16 { pub fn roundevenf(x: f32) -> f32 { select_implementation! { name: roundevenf, - use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + use_arch: all(target_arch = "aarch64", target_feature = "neon"), args: x, } @@ -33,7 +33,7 @@ pub fn roundevenf(x: f32) -> f32 { pub fn roundeven(x: f64) -> f64 { select_implementation! { name: roundeven, - use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + use_arch: all(target_arch = "aarch64", target_feature = "neon"), args: x, }