Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Neon][RVV] Enable RVV segment load/store only when we have __riscv_zvlsseg flag. #1285

Merged
merged 2 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 30 additions & 28 deletions simde/arm/neon/ld2.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ simde_vld2_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
simde_vget_high_s8(q)
};
return u;
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int8x8_private a_[2];
vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(&ptr[0], 8);
a_[0].sv64 = __riscv_vget_v_i8m1x2_i8m1(dest, 0);
Expand Down Expand Up @@ -102,7 +102,7 @@ simde_int16x4x2_t
simde_vld2_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_s16(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int16x4_private a_[2];
vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(&ptr[0], 4);
a_[0].sv64 = __riscv_vget_v_i16m1x2_i16m1(dest, 0);
Expand Down Expand Up @@ -152,7 +152,7 @@ simde_int32x2x2_t
simde_vld2_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_s32(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int32x2_private a_[2];
vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(&ptr[0], 2);
a_[0].sv64 = __riscv_vget_v_i32m1x2_i32m1(dest, 0);
Expand Down Expand Up @@ -195,7 +195,7 @@ simde_int64x1x2_t
simde_vld2_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_s64(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int64x1_private a_[2];
vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(&ptr[0], 1);
a_[0].sv64 = __riscv_vget_v_i64m1x2_i64m1(dest, 0);
Expand Down Expand Up @@ -249,7 +249,7 @@ simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
simde_vget_high_u8(q)
};
return u;
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint8x8_private a_[2];
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 8);
a_[0].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
Expand Down Expand Up @@ -292,7 +292,7 @@ simde_uint16x4x2_t
simde_vld2_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u16(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint16x4_private a_[2];
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 4);
a_[0].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
Expand Down Expand Up @@ -342,7 +342,7 @@ simde_uint32x2x2_t
simde_vld2_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u32(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint32x2_private a_[2];
vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(&ptr[0], 2);
a_[0].sv64 = __riscv_vget_v_u32m1x2_u32m1(dest, 0);
Expand Down Expand Up @@ -385,7 +385,7 @@ simde_uint64x1x2_t
simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u64(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint64x1_private a_[2];
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 1);
a_[0].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
Expand Down Expand Up @@ -428,7 +428,8 @@ simde_float16x4x2_t
simde_vld2_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vld2_f16(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \
&& SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
simde_float16x4_private r_[2];
vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)&ptr[0], 4);
r_[0].sv64 = __riscv_vget_v_f16m1x2_f16m1(dest, 0);
Expand Down Expand Up @@ -466,7 +467,7 @@ simde_float32x2x2_t
simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_f32(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_float32x2_private r_[2];
vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(&ptr[0], 2);
r_[0].sv64 = __riscv_vget_v_f32m1x2_f32m1(dest, 0);
Expand Down Expand Up @@ -509,7 +510,7 @@ simde_float64x1x2_t
simde_vld2_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2_f64(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_float64x1_private r_[2];
vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(&ptr[0], 1);
r_[0].sv64 = __riscv_vget_v_f64m1x2_f64m1(dest, 0);
Expand Down Expand Up @@ -552,7 +553,7 @@ simde_int8x16x2_t
simde_vld2q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_s8(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int8x16_private a_[2];
vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(&ptr[0], 16);
a_[0].sv128 = __riscv_vget_v_i8m1x2_i8m1(dest, 0);
Expand Down Expand Up @@ -602,7 +603,7 @@ simde_int32x4x2_t
simde_vld2q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_s32(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int32x4_private a_[2];
vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(&ptr[0], 4);
a_[0].sv128 = __riscv_vget_v_i32m1x2_i32m1(dest, 0);
Expand Down Expand Up @@ -652,7 +653,7 @@ simde_int16x8x2_t
simde_vld2q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_s16(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int16x8_private r_[2];
vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(&ptr[0], 8);
r_[0].sv128 = __riscv_vget_v_i16m1x2_i16m1(dest, 0);
Expand Down Expand Up @@ -702,7 +703,7 @@ simde_int64x2x2_t
simde_vld2q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2q_s64(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_int64x2_private r_[2];
vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(&ptr[0], 2);
r_[0].sv128 = __riscv_vget_v_i64m1x2_i64m1(dest, 0);
Expand Down Expand Up @@ -739,7 +740,7 @@ simde_uint8x16x2_t
simde_vld2q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_u8(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint8x16_private r_[2];
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 16);
r_[0].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
Expand Down Expand Up @@ -789,7 +790,7 @@ simde_uint16x8x2_t
simde_vld2q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_u16(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint16x8_private r_[2];
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 8);
r_[0].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
Expand Down Expand Up @@ -839,7 +840,7 @@ simde_uint32x4x2_t
simde_vld2q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_u32(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint32x4_private r_[2];
vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(&ptr[0], 4);
r_[0].sv128 = __riscv_vget_v_u32m1x2_u32m1(dest, 0);
Expand Down Expand Up @@ -889,7 +890,7 @@ simde_uint64x2x2_t
simde_vld2q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2q_u64(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_uint64x2_private r_[2];
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 2);
r_[0].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
Expand Down Expand Up @@ -926,7 +927,8 @@ simde_float16x8x2_t
simde_vld2q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vld2q_f16(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \
&& SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
simde_float16x8_private r_[2];
vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)&ptr[0], 8);
r_[0].sv128 = __riscv_vget_v_f16m1x2_f16m1(dest, 0);
Expand Down Expand Up @@ -971,7 +973,7 @@ simde_float32x4x2_t
simde_vld2q_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_f32(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_float32x4_private r_[2];
vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(&ptr[0], 4);
r_[0].sv128 = __riscv_vget_v_f32m1x2_f32m1(dest, 0);
Expand Down Expand Up @@ -1021,7 +1023,7 @@ simde_float64x2x2_t
simde_vld2q_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2q_f64(ptr);
#elif defined(SIMDE_RISCV_V_NATIVE)
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
simde_float64x2_private r_[2];
vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(&ptr[0], 2);
r_[0].sv128 = __riscv_vget_v_f64m1x2_f64m1(dest, 0);
Expand Down Expand Up @@ -1060,7 +1062,7 @@ simde_vld2_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
return vld2_p8(ptr);
#else
simde_poly8x8_private r_[2];
#if defined(SIMDE_RISCV_V_NATIVE)
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 8);
r_[0].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
r_[1].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 1);
Expand Down Expand Up @@ -1095,7 +1097,7 @@ simde_vld2_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
#endif
simde_poly16x4_private r_[2];
#if defined(SIMDE_RISCV_V_NATIVE)
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 4);
r_[0].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
r_[1].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 1);
Expand Down Expand Up @@ -1131,7 +1133,7 @@ simde_vld2_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#else
simde_poly64x1_private r_[2];

#if defined(SIMDE_RISCV_V_NATIVE)
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 1);
r_[0].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
r_[1].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 1);
Expand Down Expand Up @@ -1168,7 +1170,7 @@ simde_vld2q_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#endif
simde_poly8x16_private r_[2];

#if defined(SIMDE_RISCV_V_NATIVE)
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 16);
r_[0].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
r_[1].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 1);
Expand Down Expand Up @@ -1208,7 +1210,7 @@ simde_vld2q_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#endif
simde_poly16x8_private r_[2];

#if defined(SIMDE_RISCV_V_NATIVE)
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 8);
r_[0].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
r_[1].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 1);
Expand Down Expand Up @@ -1244,7 +1246,7 @@ simde_vld2q_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#else
simde_poly64x2_private r_[2];

#if defined(SIMDE_RISCV_V_NATIVE)
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 2);
r_[0].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
r_[1].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 1);
Expand Down
Loading