@@ -298,46 +298,40 @@ pub fn vabsq_f64(a: float64x2_t) -> float64x2_t {
298
298
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
299
299
#[cfg_attr(test, assert_instr(abs))]
300
300
pub fn vabs_s64(a: int64x1_t) -> int64x1_t {
301
- unsafe extern "unadjusted" {
302
- #[cfg_attr(
303
- any(target_arch = "aarch64", target_arch = "arm64ec"),
304
- link_name = "llvm.aarch64.neon.abs.v1i64"
305
- )]
306
- fn _vabs_s64(a: int64x1_t) -> int64x1_t;
301
+ unsafe {
302
+ let neg: int64x1_t = simd_neg(a);
303
+ let mask: int64x1_t = simd_ge(a, neg);
304
+ simd_select(mask, a, neg)
307
305
}
308
- unsafe { _vabs_s64(a) }
309
306
}
310
307
#[doc = "Absolute Value (wrapping)."]
311
- #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64 )"]
308
+ #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64 )"]
312
309
#[inline]
313
310
#[target_feature(enable = "neon")]
314
311
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
315
312
#[cfg_attr(test, assert_instr(abs))]
316
- pub fn vabsd_s64(a: i64) -> i64 {
317
- unsafe extern "unadjusted" {
318
- #[cfg_attr(
319
- any(target_arch = "aarch64", target_arch = "arm64ec"),
320
- link_name = "llvm.aarch64.neon.abs.i64"
321
- )]
322
- fn _vabsd_s64(a: i64) -> i64;
313
+ pub fn vabsq_s64(a: int64x2_t) -> int64x2_t {
314
+ unsafe {
315
+ let neg: int64x2_t = simd_neg(a);
316
+ let mask: int64x2_t = simd_ge(a, neg);
317
+ simd_select(mask, a, neg)
323
318
}
324
- unsafe { _vabsd_s64(a) }
325
319
}
326
320
#[doc = "Absolute Value (wrapping)."]
327
- #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64 )"]
321
+ #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64 )"]
328
322
#[inline]
329
323
#[target_feature(enable = "neon")]
330
324
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
331
325
#[cfg_attr(test, assert_instr(abs))]
332
- pub fn vabsq_s64 (a: int64x2_t ) -> int64x2_t {
326
+ pub fn vabsd_s64 (a: i64 ) -> i64 {
333
327
unsafe extern "unadjusted" {
334
328
#[cfg_attr(
335
329
any(target_arch = "aarch64", target_arch = "arm64ec"),
336
- link_name = "llvm.aarch64.neon.abs.v2i64 "
330
+ link_name = "llvm.aarch64.neon.abs.i64 "
337
331
)]
338
- fn _vabsq_s64 (a: int64x2_t ) -> int64x2_t ;
332
+ fn _vabsd_s64 (a: i64 ) -> i64 ;
339
333
}
340
- unsafe { _vabsq_s64 (a) }
334
+ unsafe { _vabsd_s64 (a) }
341
335
}
342
336
#[doc = "Add"]
343
337
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_s64)"]
@@ -604,14 +598,7 @@ pub fn vaddvq_f64(a: float64x2_t) -> f64 {
604
598
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
605
599
#[cfg_attr(test, assert_instr(addp))]
606
600
pub fn vaddv_s32(a: int32x2_t) -> i32 {
607
- unsafe extern "unadjusted" {
608
- #[cfg_attr(
609
- any(target_arch = "aarch64", target_arch = "arm64ec"),
610
- link_name = "llvm.aarch64.neon.saddv.i32.v2i32"
611
- )]
612
- fn _vaddv_s32(a: int32x2_t) -> i32;
613
- }
614
- unsafe { _vaddv_s32(a) }
601
+ unsafe { simd_reduce_add_unordered(a) }
615
602
}
616
603
#[doc = "Add across vector"]
617
604
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s8)"]
@@ -620,14 +607,7 @@ pub fn vaddv_s32(a: int32x2_t) -> i32 {
620
607
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
621
608
#[cfg_attr(test, assert_instr(addv))]
622
609
pub fn vaddv_s8(a: int8x8_t) -> i8 {
623
- unsafe extern "unadjusted" {
624
- #[cfg_attr(
625
- any(target_arch = "aarch64", target_arch = "arm64ec"),
626
- link_name = "llvm.aarch64.neon.saddv.i8.v8i8"
627
- )]
628
- fn _vaddv_s8(a: int8x8_t) -> i8;
629
- }
630
- unsafe { _vaddv_s8(a) }
610
+ unsafe { simd_reduce_add_unordered(a) }
631
611
}
632
612
#[doc = "Add across vector"]
633
613
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s8)"]
@@ -636,14 +616,7 @@ pub fn vaddv_s8(a: int8x8_t) -> i8 {
636
616
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
637
617
#[cfg_attr(test, assert_instr(addv))]
638
618
pub fn vaddvq_s8(a: int8x16_t) -> i8 {
639
- unsafe extern "unadjusted" {
640
- #[cfg_attr(
641
- any(target_arch = "aarch64", target_arch = "arm64ec"),
642
- link_name = "llvm.aarch64.neon.saddv.i8.v16i8"
643
- )]
644
- fn _vaddvq_s8(a: int8x16_t) -> i8;
645
- }
646
- unsafe { _vaddvq_s8(a) }
619
+ unsafe { simd_reduce_add_unordered(a) }
647
620
}
648
621
#[doc = "Add across vector"]
649
622
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s16)"]
@@ -652,14 +625,7 @@ pub fn vaddvq_s8(a: int8x16_t) -> i8 {
652
625
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
653
626
#[cfg_attr(test, assert_instr(addv))]
654
627
pub fn vaddv_s16(a: int16x4_t) -> i16 {
655
- unsafe extern "unadjusted" {
656
- #[cfg_attr(
657
- any(target_arch = "aarch64", target_arch = "arm64ec"),
658
- link_name = "llvm.aarch64.neon.saddv.i16.v4i16"
659
- )]
660
- fn _vaddv_s16(a: int16x4_t) -> i16;
661
- }
662
- unsafe { _vaddv_s16(a) }
628
+ unsafe { simd_reduce_add_unordered(a) }
663
629
}
664
630
#[doc = "Add across vector"]
665
631
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s16)"]
@@ -668,14 +634,7 @@ pub fn vaddv_s16(a: int16x4_t) -> i16 {
668
634
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
669
635
#[cfg_attr(test, assert_instr(addv))]
670
636
pub fn vaddvq_s16(a: int16x8_t) -> i16 {
671
- unsafe extern "unadjusted" {
672
- #[cfg_attr(
673
- any(target_arch = "aarch64", target_arch = "arm64ec"),
674
- link_name = "llvm.aarch64.neon.saddv.i16.v8i16"
675
- )]
676
- fn _vaddvq_s16(a: int16x8_t) -> i16;
677
- }
678
- unsafe { _vaddvq_s16(a) }
637
+ unsafe { simd_reduce_add_unordered(a) }
679
638
}
680
639
#[doc = "Add across vector"]
681
640
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s32)"]
@@ -684,14 +643,7 @@ pub fn vaddvq_s16(a: int16x8_t) -> i16 {
684
643
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
685
644
#[cfg_attr(test, assert_instr(addv))]
686
645
pub fn vaddvq_s32(a: int32x4_t) -> i32 {
687
- unsafe extern "unadjusted" {
688
- #[cfg_attr(
689
- any(target_arch = "aarch64", target_arch = "arm64ec"),
690
- link_name = "llvm.aarch64.neon.saddv.i32.v4i32"
691
- )]
692
- fn _vaddvq_s32(a: int32x4_t) -> i32;
693
- }
694
- unsafe { _vaddvq_s32(a) }
646
+ unsafe { simd_reduce_add_unordered(a) }
695
647
}
696
648
#[doc = "Add across vector"]
697
649
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u32)"]
@@ -700,14 +652,7 @@ pub fn vaddvq_s32(a: int32x4_t) -> i32 {
700
652
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
701
653
#[cfg_attr(test, assert_instr(addp))]
702
654
pub fn vaddv_u32(a: uint32x2_t) -> u32 {
703
- unsafe extern "unadjusted" {
704
- #[cfg_attr(
705
- any(target_arch = "aarch64", target_arch = "arm64ec"),
706
- link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"
707
- )]
708
- fn _vaddv_u32(a: uint32x2_t) -> u32;
709
- }
710
- unsafe { _vaddv_u32(a) }
655
+ unsafe { simd_reduce_add_unordered(a) }
711
656
}
712
657
#[doc = "Add across vector"]
713
658
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u8)"]
@@ -716,14 +661,7 @@ pub fn vaddv_u32(a: uint32x2_t) -> u32 {
716
661
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
717
662
#[cfg_attr(test, assert_instr(addv))]
718
663
pub fn vaddv_u8(a: uint8x8_t) -> u8 {
719
- unsafe extern "unadjusted" {
720
- #[cfg_attr(
721
- any(target_arch = "aarch64", target_arch = "arm64ec"),
722
- link_name = "llvm.aarch64.neon.uaddv.i8.v8i8"
723
- )]
724
- fn _vaddv_u8(a: uint8x8_t) -> u8;
725
- }
726
- unsafe { _vaddv_u8(a) }
664
+ unsafe { simd_reduce_add_unordered(a) }
727
665
}
728
666
#[doc = "Add across vector"]
729
667
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u8)"]
@@ -732,14 +670,7 @@ pub fn vaddv_u8(a: uint8x8_t) -> u8 {
732
670
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
733
671
#[cfg_attr(test, assert_instr(addv))]
734
672
pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
735
- unsafe extern "unadjusted" {
736
- #[cfg_attr(
737
- any(target_arch = "aarch64", target_arch = "arm64ec"),
738
- link_name = "llvm.aarch64.neon.uaddv.i8.v16i8"
739
- )]
740
- fn _vaddvq_u8(a: uint8x16_t) -> u8;
741
- }
742
- unsafe { _vaddvq_u8(a) }
673
+ unsafe { simd_reduce_add_unordered(a) }
743
674
}
744
675
#[doc = "Add across vector"]
745
676
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u16)"]
@@ -748,14 +679,7 @@ pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
748
679
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
749
680
#[cfg_attr(test, assert_instr(addv))]
750
681
pub fn vaddv_u16(a: uint16x4_t) -> u16 {
751
- unsafe extern "unadjusted" {
752
- #[cfg_attr(
753
- any(target_arch = "aarch64", target_arch = "arm64ec"),
754
- link_name = "llvm.aarch64.neon.uaddv.i16.v4i16"
755
- )]
756
- fn _vaddv_u16(a: uint16x4_t) -> u16;
757
- }
758
- unsafe { _vaddv_u16(a) }
682
+ unsafe { simd_reduce_add_unordered(a) }
759
683
}
760
684
#[doc = "Add across vector"]
761
685
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u16)"]
@@ -764,14 +688,7 @@ pub fn vaddv_u16(a: uint16x4_t) -> u16 {
764
688
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
765
689
#[cfg_attr(test, assert_instr(addv))]
766
690
pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
767
- unsafe extern "unadjusted" {
768
- #[cfg_attr(
769
- any(target_arch = "aarch64", target_arch = "arm64ec"),
770
- link_name = "llvm.aarch64.neon.uaddv.i16.v8i16"
771
- )]
772
- fn _vaddvq_u16(a: uint16x8_t) -> u16;
773
- }
774
- unsafe { _vaddvq_u16(a) }
691
+ unsafe { simd_reduce_add_unordered(a) }
775
692
}
776
693
#[doc = "Add across vector"]
777
694
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u32)"]
@@ -780,14 +697,7 @@ pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
780
697
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
781
698
#[cfg_attr(test, assert_instr(addv))]
782
699
pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
783
- unsafe extern "unadjusted" {
784
- #[cfg_attr(
785
- any(target_arch = "aarch64", target_arch = "arm64ec"),
786
- link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"
787
- )]
788
- fn _vaddvq_u32(a: uint32x4_t) -> u32;
789
- }
790
- unsafe { _vaddvq_u32(a) }
700
+ unsafe { simd_reduce_add_unordered(a) }
791
701
}
792
702
#[doc = "Add across vector"]
793
703
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s64)"]
@@ -796,14 +706,7 @@ pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
796
706
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
797
707
#[cfg_attr(test, assert_instr(addp))]
798
708
pub fn vaddvq_s64(a: int64x2_t) -> i64 {
799
- unsafe extern "unadjusted" {
800
- #[cfg_attr(
801
- any(target_arch = "aarch64", target_arch = "arm64ec"),
802
- link_name = "llvm.aarch64.neon.saddv.i64.v2i64"
803
- )]
804
- fn _vaddvq_s64(a: int64x2_t) -> i64;
805
- }
806
- unsafe { _vaddvq_s64(a) }
709
+ unsafe { simd_reduce_add_unordered(a) }
807
710
}
808
711
#[doc = "Add across vector"]
809
712
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u64)"]
@@ -812,14 +715,7 @@ pub fn vaddvq_s64(a: int64x2_t) -> i64 {
812
715
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
813
716
#[cfg_attr(test, assert_instr(addp))]
814
717
pub fn vaddvq_u64(a: uint64x2_t) -> u64 {
815
- unsafe extern "unadjusted" {
816
- #[cfg_attr(
817
- any(target_arch = "aarch64", target_arch = "arm64ec"),
818
- link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"
819
- )]
820
- fn _vaddvq_u64(a: uint64x2_t) -> u64;
821
- }
822
- unsafe { _vaddvq_u64(a) }
718
+ unsafe { simd_reduce_add_unordered(a) }
823
719
}
824
720
#[doc = "Multi-vector floating-point absolute maximum"]
825
721
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamax_f32)"]
@@ -15671,23 +15567,11 @@ pub fn vpadds_f32(a: float32x2_t) -> f32 {
15671
15567
#[doc = "Add pairwise"]
15672
15568
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
15673
15569
#[inline]
15674
- #[cfg(target_endian = "little")]
15675
15570
#[target_feature(enable = "neon")]
15676
15571
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15677
15572
#[cfg_attr(test, assert_instr(addp))]
15678
15573
pub fn vpaddd_s64(a: int64x2_t) -> i64 {
15679
- unsafe { transmute(vaddvq_u64(transmute(a))) }
15680
- }
15681
- #[doc = "Add pairwise"]
15682
- #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
15683
- #[inline]
15684
- #[cfg(target_endian = "big")]
15685
- #[target_feature(enable = "neon")]
15686
- #[stable(feature = "neon_intrinsics", since = "1.59.0")]
15687
- #[cfg_attr(test, assert_instr(addp))]
15688
- pub fn vpaddd_s64(a: int64x2_t) -> i64 {
15689
- let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
15690
- unsafe { transmute(vaddvq_u64(transmute(a))) }
15574
+ unsafe { simd_reduce_add_unordered(a) }
15691
15575
}
15692
15576
#[doc = "Add pairwise"]
15693
15577
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_u64)"]
@@ -15696,7 +15580,7 @@ pub fn vpaddd_s64(a: int64x2_t) -> i64 {
15696
15580
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15697
15581
#[cfg_attr(test, assert_instr(addp))]
15698
15582
pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
15699
- vaddvq_u64 (a)
15583
+ unsafe { simd_reduce_add_unordered (a) }
15700
15584
}
15701
15585
#[doc = "Floating-point add pairwise"]
15702
15586
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"]
0 commit comments