Skip to content

Commit c6b2f48

Browse files
authored
Merge pull request #1872 from folkertdev/aarch64-horizontal-add
`aarch64`: use `intrinsics::simd` for horizontal add and `abs`
2 parents d22c313 + 06ff4f9 commit c6b2f48

File tree

4 files changed

+102
-244
lines changed

4 files changed

+102
-244
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 31 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -298,46 +298,40 @@ pub fn vabsq_f64(a: float64x2_t) -> float64x2_t {
298298
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
299299
#[cfg_attr(test, assert_instr(abs))]
300300
pub fn vabs_s64(a: int64x1_t) -> int64x1_t {
301-
unsafe extern "unadjusted" {
302-
#[cfg_attr(
303-
any(target_arch = "aarch64", target_arch = "arm64ec"),
304-
link_name = "llvm.aarch64.neon.abs.v1i64"
305-
)]
306-
fn _vabs_s64(a: int64x1_t) -> int64x1_t;
301+
unsafe {
302+
let neg: int64x1_t = simd_neg(a);
303+
let mask: int64x1_t = simd_ge(a, neg);
304+
simd_select(mask, a, neg)
307305
}
308-
unsafe { _vabs_s64(a) }
309306
}
310307
#[doc = "Absolute Value (wrapping)."]
311-
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64)"]
308+
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64)"]
312309
#[inline]
313310
#[target_feature(enable = "neon")]
314311
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
315312
#[cfg_attr(test, assert_instr(abs))]
316-
pub fn vabsd_s64(a: i64) -> i64 {
317-
unsafe extern "unadjusted" {
318-
#[cfg_attr(
319-
any(target_arch = "aarch64", target_arch = "arm64ec"),
320-
link_name = "llvm.aarch64.neon.abs.i64"
321-
)]
322-
fn _vabsd_s64(a: i64) -> i64;
313+
pub fn vabsq_s64(a: int64x2_t) -> int64x2_t {
314+
unsafe {
315+
let neg: int64x2_t = simd_neg(a);
316+
let mask: int64x2_t = simd_ge(a, neg);
317+
simd_select(mask, a, neg)
323318
}
324-
unsafe { _vabsd_s64(a) }
325319
}
326320
#[doc = "Absolute Value (wrapping)."]
327-
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64)"]
321+
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64)"]
328322
#[inline]
329323
#[target_feature(enable = "neon")]
330324
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
331325
#[cfg_attr(test, assert_instr(abs))]
332-
pub fn vabsq_s64(a: int64x2_t) -> int64x2_t {
326+
pub fn vabsd_s64(a: i64) -> i64 {
333327
unsafe extern "unadjusted" {
334328
#[cfg_attr(
335329
any(target_arch = "aarch64", target_arch = "arm64ec"),
336-
link_name = "llvm.aarch64.neon.abs.v2i64"
330+
link_name = "llvm.aarch64.neon.abs.i64"
337331
)]
338-
fn _vabsq_s64(a: int64x2_t) -> int64x2_t;
332+
fn _vabsd_s64(a: i64) -> i64;
339333
}
340-
unsafe { _vabsq_s64(a) }
334+
unsafe { _vabsd_s64(a) }
341335
}
342336
#[doc = "Add"]
343337
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_s64)"]
@@ -604,14 +598,7 @@ pub fn vaddvq_f64(a: float64x2_t) -> f64 {
604598
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
605599
#[cfg_attr(test, assert_instr(addp))]
606600
pub fn vaddv_s32(a: int32x2_t) -> i32 {
607-
unsafe extern "unadjusted" {
608-
#[cfg_attr(
609-
any(target_arch = "aarch64", target_arch = "arm64ec"),
610-
link_name = "llvm.aarch64.neon.saddv.i32.v2i32"
611-
)]
612-
fn _vaddv_s32(a: int32x2_t) -> i32;
613-
}
614-
unsafe { _vaddv_s32(a) }
601+
unsafe { simd_reduce_add_unordered(a) }
615602
}
616603
#[doc = "Add across vector"]
617604
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s8)"]
@@ -620,14 +607,7 @@ pub fn vaddv_s32(a: int32x2_t) -> i32 {
620607
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
621608
#[cfg_attr(test, assert_instr(addv))]
622609
pub fn vaddv_s8(a: int8x8_t) -> i8 {
623-
unsafe extern "unadjusted" {
624-
#[cfg_attr(
625-
any(target_arch = "aarch64", target_arch = "arm64ec"),
626-
link_name = "llvm.aarch64.neon.saddv.i8.v8i8"
627-
)]
628-
fn _vaddv_s8(a: int8x8_t) -> i8;
629-
}
630-
unsafe { _vaddv_s8(a) }
610+
unsafe { simd_reduce_add_unordered(a) }
631611
}
632612
#[doc = "Add across vector"]
633613
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s8)"]
@@ -636,14 +616,7 @@ pub fn vaddv_s8(a: int8x8_t) -> i8 {
636616
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
637617
#[cfg_attr(test, assert_instr(addv))]
638618
pub fn vaddvq_s8(a: int8x16_t) -> i8 {
639-
unsafe extern "unadjusted" {
640-
#[cfg_attr(
641-
any(target_arch = "aarch64", target_arch = "arm64ec"),
642-
link_name = "llvm.aarch64.neon.saddv.i8.v16i8"
643-
)]
644-
fn _vaddvq_s8(a: int8x16_t) -> i8;
645-
}
646-
unsafe { _vaddvq_s8(a) }
619+
unsafe { simd_reduce_add_unordered(a) }
647620
}
648621
#[doc = "Add across vector"]
649622
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s16)"]
@@ -652,14 +625,7 @@ pub fn vaddvq_s8(a: int8x16_t) -> i8 {
652625
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
653626
#[cfg_attr(test, assert_instr(addv))]
654627
pub fn vaddv_s16(a: int16x4_t) -> i16 {
655-
unsafe extern "unadjusted" {
656-
#[cfg_attr(
657-
any(target_arch = "aarch64", target_arch = "arm64ec"),
658-
link_name = "llvm.aarch64.neon.saddv.i16.v4i16"
659-
)]
660-
fn _vaddv_s16(a: int16x4_t) -> i16;
661-
}
662-
unsafe { _vaddv_s16(a) }
628+
unsafe { simd_reduce_add_unordered(a) }
663629
}
664630
#[doc = "Add across vector"]
665631
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s16)"]
@@ -668,14 +634,7 @@ pub fn vaddv_s16(a: int16x4_t) -> i16 {
668634
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
669635
#[cfg_attr(test, assert_instr(addv))]
670636
pub fn vaddvq_s16(a: int16x8_t) -> i16 {
671-
unsafe extern "unadjusted" {
672-
#[cfg_attr(
673-
any(target_arch = "aarch64", target_arch = "arm64ec"),
674-
link_name = "llvm.aarch64.neon.saddv.i16.v8i16"
675-
)]
676-
fn _vaddvq_s16(a: int16x8_t) -> i16;
677-
}
678-
unsafe { _vaddvq_s16(a) }
637+
unsafe { simd_reduce_add_unordered(a) }
679638
}
680639
#[doc = "Add across vector"]
681640
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s32)"]
@@ -684,14 +643,7 @@ pub fn vaddvq_s16(a: int16x8_t) -> i16 {
684643
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
685644
#[cfg_attr(test, assert_instr(addv))]
686645
pub fn vaddvq_s32(a: int32x4_t) -> i32 {
687-
unsafe extern "unadjusted" {
688-
#[cfg_attr(
689-
any(target_arch = "aarch64", target_arch = "arm64ec"),
690-
link_name = "llvm.aarch64.neon.saddv.i32.v4i32"
691-
)]
692-
fn _vaddvq_s32(a: int32x4_t) -> i32;
693-
}
694-
unsafe { _vaddvq_s32(a) }
646+
unsafe { simd_reduce_add_unordered(a) }
695647
}
696648
#[doc = "Add across vector"]
697649
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u32)"]
@@ -700,14 +652,7 @@ pub fn vaddvq_s32(a: int32x4_t) -> i32 {
700652
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
701653
#[cfg_attr(test, assert_instr(addp))]
702654
pub fn vaddv_u32(a: uint32x2_t) -> u32 {
703-
unsafe extern "unadjusted" {
704-
#[cfg_attr(
705-
any(target_arch = "aarch64", target_arch = "arm64ec"),
706-
link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"
707-
)]
708-
fn _vaddv_u32(a: uint32x2_t) -> u32;
709-
}
710-
unsafe { _vaddv_u32(a) }
655+
unsafe { simd_reduce_add_unordered(a) }
711656
}
712657
#[doc = "Add across vector"]
713658
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u8)"]
@@ -716,14 +661,7 @@ pub fn vaddv_u32(a: uint32x2_t) -> u32 {
716661
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
717662
#[cfg_attr(test, assert_instr(addv))]
718663
pub fn vaddv_u8(a: uint8x8_t) -> u8 {
719-
unsafe extern "unadjusted" {
720-
#[cfg_attr(
721-
any(target_arch = "aarch64", target_arch = "arm64ec"),
722-
link_name = "llvm.aarch64.neon.uaddv.i8.v8i8"
723-
)]
724-
fn _vaddv_u8(a: uint8x8_t) -> u8;
725-
}
726-
unsafe { _vaddv_u8(a) }
664+
unsafe { simd_reduce_add_unordered(a) }
727665
}
728666
#[doc = "Add across vector"]
729667
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u8)"]
@@ -732,14 +670,7 @@ pub fn vaddv_u8(a: uint8x8_t) -> u8 {
732670
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
733671
#[cfg_attr(test, assert_instr(addv))]
734672
pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
735-
unsafe extern "unadjusted" {
736-
#[cfg_attr(
737-
any(target_arch = "aarch64", target_arch = "arm64ec"),
738-
link_name = "llvm.aarch64.neon.uaddv.i8.v16i8"
739-
)]
740-
fn _vaddvq_u8(a: uint8x16_t) -> u8;
741-
}
742-
unsafe { _vaddvq_u8(a) }
673+
unsafe { simd_reduce_add_unordered(a) }
743674
}
744675
#[doc = "Add across vector"]
745676
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u16)"]
@@ -748,14 +679,7 @@ pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
748679
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
749680
#[cfg_attr(test, assert_instr(addv))]
750681
pub fn vaddv_u16(a: uint16x4_t) -> u16 {
751-
unsafe extern "unadjusted" {
752-
#[cfg_attr(
753-
any(target_arch = "aarch64", target_arch = "arm64ec"),
754-
link_name = "llvm.aarch64.neon.uaddv.i16.v4i16"
755-
)]
756-
fn _vaddv_u16(a: uint16x4_t) -> u16;
757-
}
758-
unsafe { _vaddv_u16(a) }
682+
unsafe { simd_reduce_add_unordered(a) }
759683
}
760684
#[doc = "Add across vector"]
761685
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u16)"]
@@ -764,14 +688,7 @@ pub fn vaddv_u16(a: uint16x4_t) -> u16 {
764688
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
765689
#[cfg_attr(test, assert_instr(addv))]
766690
pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
767-
unsafe extern "unadjusted" {
768-
#[cfg_attr(
769-
any(target_arch = "aarch64", target_arch = "arm64ec"),
770-
link_name = "llvm.aarch64.neon.uaddv.i16.v8i16"
771-
)]
772-
fn _vaddvq_u16(a: uint16x8_t) -> u16;
773-
}
774-
unsafe { _vaddvq_u16(a) }
691+
unsafe { simd_reduce_add_unordered(a) }
775692
}
776693
#[doc = "Add across vector"]
777694
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u32)"]
@@ -780,14 +697,7 @@ pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
780697
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
781698
#[cfg_attr(test, assert_instr(addv))]
782699
pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
783-
unsafe extern "unadjusted" {
784-
#[cfg_attr(
785-
any(target_arch = "aarch64", target_arch = "arm64ec"),
786-
link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"
787-
)]
788-
fn _vaddvq_u32(a: uint32x4_t) -> u32;
789-
}
790-
unsafe { _vaddvq_u32(a) }
700+
unsafe { simd_reduce_add_unordered(a) }
791701
}
792702
#[doc = "Add across vector"]
793703
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s64)"]
@@ -796,14 +706,7 @@ pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
796706
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
797707
#[cfg_attr(test, assert_instr(addp))]
798708
pub fn vaddvq_s64(a: int64x2_t) -> i64 {
799-
unsafe extern "unadjusted" {
800-
#[cfg_attr(
801-
any(target_arch = "aarch64", target_arch = "arm64ec"),
802-
link_name = "llvm.aarch64.neon.saddv.i64.v2i64"
803-
)]
804-
fn _vaddvq_s64(a: int64x2_t) -> i64;
805-
}
806-
unsafe { _vaddvq_s64(a) }
709+
unsafe { simd_reduce_add_unordered(a) }
807710
}
808711
#[doc = "Add across vector"]
809712
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u64)"]
@@ -812,14 +715,7 @@ pub fn vaddvq_s64(a: int64x2_t) -> i64 {
812715
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
813716
#[cfg_attr(test, assert_instr(addp))]
814717
pub fn vaddvq_u64(a: uint64x2_t) -> u64 {
815-
unsafe extern "unadjusted" {
816-
#[cfg_attr(
817-
any(target_arch = "aarch64", target_arch = "arm64ec"),
818-
link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"
819-
)]
820-
fn _vaddvq_u64(a: uint64x2_t) -> u64;
821-
}
822-
unsafe { _vaddvq_u64(a) }
718+
unsafe { simd_reduce_add_unordered(a) }
823719
}
824720
#[doc = "Multi-vector floating-point absolute maximum"]
825721
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamax_f32)"]
@@ -15671,23 +15567,11 @@ pub fn vpadds_f32(a: float32x2_t) -> f32 {
1567115567
#[doc = "Add pairwise"]
1567215568
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
1567315569
#[inline]
15674-
#[cfg(target_endian = "little")]
1567515570
#[target_feature(enable = "neon")]
1567615571
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1567715572
#[cfg_attr(test, assert_instr(addp))]
1567815573
pub fn vpaddd_s64(a: int64x2_t) -> i64 {
15679-
unsafe { transmute(vaddvq_u64(transmute(a))) }
15680-
}
15681-
#[doc = "Add pairwise"]
15682-
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
15683-
#[inline]
15684-
#[cfg(target_endian = "big")]
15685-
#[target_feature(enable = "neon")]
15686-
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15687-
#[cfg_attr(test, assert_instr(addp))]
15688-
pub fn vpaddd_s64(a: int64x2_t) -> i64 {
15689-
let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
15690-
unsafe { transmute(vaddvq_u64(transmute(a))) }
15574+
unsafe { simd_reduce_add_unordered(a) }
1569115575
}
1569215576
#[doc = "Add pairwise"]
1569315577
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_u64)"]
@@ -15696,7 +15580,7 @@ pub fn vpaddd_s64(a: int64x2_t) -> i64 {
1569615580
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1569715581
#[cfg_attr(test, assert_instr(addp))]
1569815582
pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
15699-
vaddvq_u64(a)
15583+
unsafe { simd_reduce_add_unordered(a) }
1570015584
}
1570115585
#[doc = "Floating-point add pairwise"]
1570215586
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"]

0 commit comments

Comments
 (0)