diff --git a/simde/arm/neon/ld2.h b/simde/arm/neon/ld2.h index f598f112a..b22c80c05 100644 --- a/simde/arm/neon/ld2.h +++ b/simde/arm/neon/ld2.h @@ -59,7 +59,7 @@ simde_vld2_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { simde_vget_high_s8(q) }; return u; - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int8x8_private a_[2]; vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(&ptr[0], 8); a_[0].sv64 = __riscv_vget_v_i8m1x2_i8m1(dest, 0); @@ -102,7 +102,7 @@ simde_int16x4x2_t simde_vld2_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2_s16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int16x4_private a_[2]; vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(&ptr[0], 4); a_[0].sv64 = __riscv_vget_v_i16m1x2_i16m1(dest, 0); @@ -152,7 +152,7 @@ simde_int32x2x2_t simde_vld2_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2_s32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int32x2_private a_[2]; vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(&ptr[0], 2); a_[0].sv64 = __riscv_vget_v_i32m1x2_i32m1(dest, 0); @@ -195,7 +195,7 @@ simde_int64x1x2_t simde_vld2_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2_s64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int64x1_private a_[2]; vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(&ptr[0], 1); a_[0].sv64 = __riscv_vget_v_i64m1x2_i64m1(dest, 0); @@ -249,7 +249,7 @@ simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { simde_vget_high_u8(q) }; return u; - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint8x8_private a_[2]; vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 8); a_[0].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 0); @@ -292,7 +292,7 @@ simde_uint16x4x2_t simde_vld2_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2_u16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint16x4_private a_[2]; vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 4); a_[0].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 0); @@ -342,7 +342,7 @@ simde_uint32x2x2_t simde_vld2_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2_u32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint32x2_private a_[2]; vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(&ptr[0], 2); a_[0].sv64 = __riscv_vget_v_u32m1x2_u32m1(dest, 0); @@ -385,7 +385,7 @@ simde_uint64x1x2_t simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2_u64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint64x1_private a_[2]; vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 1); a_[0].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 0); @@ -428,7 +428,8 @@ simde_float16x4x2_t simde_vld2_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld2_f16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) simde_float16x4_private r_[2]; vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)&ptr[0], 4); r_[0].sv64 = __riscv_vget_v_f16m1x2_f16m1(dest, 0); @@ -466,7 +467,7 @@ simde_float32x2x2_t simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2_f32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_float32x2_private r_[2]; vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(&ptr[0], 2); r_[0].sv64 = __riscv_vget_v_f32m1x2_f32m1(dest, 0); @@ -509,7 +510,7 @@ simde_float64x1x2_t simde_vld2_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld2_f64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_float64x1_private r_[2]; vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(&ptr[0], 1); r_[0].sv64 = __riscv_vget_v_f64m1x2_f64m1(dest, 0); @@ -552,7 +553,7 @@ simde_int8x16x2_t simde_vld2q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2q_s8(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int8x16_private a_[2]; vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(&ptr[0], 16); a_[0].sv128 = __riscv_vget_v_i8m1x2_i8m1(dest, 0); @@ -602,7 +603,7 @@ simde_int32x4x2_t simde_vld2q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2q_s32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int32x4_private a_[2]; vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(&ptr[0], 4); a_[0].sv128 = __riscv_vget_v_i32m1x2_i32m1(dest, 0); @@ -652,7 +653,7 @@ simde_int16x8x2_t simde_vld2q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2q_s16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int16x8_private r_[2]; vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_i16m1x2_i16m1(dest, 0); @@ -702,7 +703,7 @@ simde_int64x2x2_t simde_vld2q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld2q_s64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int64x2_private r_[2]; vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_i64m1x2_i64m1(dest, 0); @@ -739,7 +740,7 @@ simde_uint8x16x2_t simde_vld2q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2q_u8(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint8x16_private r_[2]; vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 16); r_[0].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 0); @@ -789,7 +790,7 @@ simde_uint16x8x2_t simde_vld2q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2q_u16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint16x8_private r_[2]; vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 0); @@ -839,7 +840,7 @@ simde_uint32x4x2_t simde_vld2q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2q_u32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint32x4_private r_[2]; vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(&ptr[0], 4); r_[0].sv128 = __riscv_vget_v_u32m1x2_u32m1(dest, 0); @@ -889,7 +890,7 @@ simde_uint64x2x2_t simde_vld2q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld2q_u64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint64x2_private r_[2]; vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 0); @@ -926,7 +927,8 @@ simde_float16x8x2_t simde_vld2q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld2q_f16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) simde_float16x8_private r_[2]; vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_f16m1x2_f16m1(dest, 0); @@ -971,7 +973,7 @@ simde_float32x4x2_t simde_vld2q_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld2q_f32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_float32x4_private r_[2]; vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(&ptr[0], 4); r_[0].sv128 = __riscv_vget_v_f32m1x2_f32m1(dest, 0); @@ -1021,7 +1023,7 @@ simde_float64x2x2_t simde_vld2q_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld2q_f64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_float64x2_private r_[2]; vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_f64m1x2_f64m1(dest, 0); @@ -1060,7 +1062,7 @@ simde_vld2_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld2_p8(ptr); #else simde_poly8x8_private r_[2]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 8); r_[0].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 1); @@ -1095,7 +1097,7 @@ simde_vld2_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif simde_poly16x4_private r_[2]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 4); r_[0].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 1); @@ -1131,7 +1133,7 @@ simde_vld2_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #else simde_poly64x1_private r_[2]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 1); r_[0].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 1); @@ -1168,7 +1170,7 @@ simde_vld2q_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { #endif simde_poly8x16_private r_[2]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 16); r_[0].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 0); r_[1].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 1); @@ -1208,7 +1210,7 @@ simde_vld2q_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { #endif simde_poly16x8_private r_[2]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 0); r_[1].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 1); @@ -1244,7 +1246,7 @@ simde_vld2q_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #else simde_poly64x2_private r_[2]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 0); r_[1].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 1); diff --git a/simde/arm/neon/ld3.h b/simde/arm/neon/ld3.h index a102f2eda..eddc86a51 100644 --- a/simde/arm/neon/ld3.h +++ b/simde/arm/neon/ld3.h @@ -49,7 +49,8 @@ simde_vld3_f16(simde_float16_t const *ptr) { return vld3_f16(ptr); #else simde_float16x4_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) && \ + SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x3_t dest = __riscv_vlseg3e16_v_f16m1x3((_Float16 *)&ptr[0], 4); r_[0].sv64 = __riscv_vget_v_f16m1x3_f16m1(dest, 0); r_[1].sv64 = __riscv_vget_v_f16m1x3_f16m1(dest, 1); @@ -83,7 +84,7 @@ simde_vld3_f32(simde_float32 const *ptr) { return vld3_f32(ptr); #else simde_float32x2_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x3_t dest = __riscv_vlseg3e32_v_f32m1x3(&ptr[0], 2); r_[0].sv64 = __riscv_vget_v_f32m1x3_f32m1(dest, 0); r_[1].sv64 = __riscv_vget_v_f32m1x3_f32m1(dest, 1); @@ -116,7 +117,7 @@ simde_vld3_f64(simde_float64 const *ptr) { return vld3_f64(ptr); #else simde_float64x1_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x3_t dest = __riscv_vlseg3e64_v_f64m1x3(&ptr[0], 1); r_[0].sv64 = __riscv_vget_v_f64m1x3_f64m1(dest, 0); r_[1].sv64 = __riscv_vget_v_f64m1x3_f64m1(dest, 1); @@ -149,7 +150,7 @@ simde_vld3_s8(int8_t const *ptr) { return vld3_s8(ptr); #else simde_int8x8_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x3_t dest = __riscv_vlseg3e8_v_i8m1x3(&ptr[0], 8); r_[0].sv64 = __riscv_vget_v_i8m1x3_i8m1(dest, 0); r_[1].sv64 = __riscv_vget_v_i8m1x3_i8m1(dest, 1); @@ -182,7 +183,7 @@ simde_vld3_s16(int16_t const *ptr) { return vld3_s16(ptr); #else simde_int16x4_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x3_t dest = __riscv_vlseg3e16_v_i16m1x3(&ptr[0], 4); r_[0].sv64 = __riscv_vget_v_i16m1x3_i16m1(dest, 0); r_[1].sv64 = __riscv_vget_v_i16m1x3_i16m1(dest, 1); @@ -215,7 +216,7 @@ simde_vld3_s32(int32_t const *ptr) { return vld3_s32(ptr); #else simde_int32x2_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x3_t dest = __riscv_vlseg3e32_v_i32m1x3(&ptr[0], 2); r_[0].sv64 = __riscv_vget_v_i32m1x3_i32m1(dest, 0); r_[1].sv64 = __riscv_vget_v_i32m1x3_i32m1(dest, 1); @@ -248,7 +249,7 @@ simde_vld3_s64(int64_t const *ptr) { return vld3_s64(ptr); #else simde_int64x1_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x3_t dest = __riscv_vlseg3e64_v_i64m1x3(&ptr[0], 1); r_[0].sv64 = __riscv_vget_v_i64m1x3_i64m1(dest, 0); r_[1].sv64 = __riscv_vget_v_i64m1x3_i64m1(dest, 1); @@ -281,7 +282,7 @@ simde_vld3_u8(uint8_t const *ptr) { return vld3_u8(ptr); #else simde_uint8x8_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(&ptr[0], 8); r_[0].sv64 = __riscv_vget_v_u8m1x3_u8m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u8m1x3_u8m1(dest, 1); @@ -314,7 +315,7 @@ simde_vld3_u16(uint16_t const *ptr) { return vld3_u16(ptr); #else simde_uint16x4_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(&ptr[0], 4); r_[0].sv64 = __riscv_vget_v_u16m1x3_u16m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u16m1x3_u16m1(dest, 1); @@ -347,7 +348,7 @@ simde_vld3_u32(uint32_t const *ptr) { return vld3_u32(ptr); #else simde_uint32x2_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x3_t dest = __riscv_vlseg3e32_v_u32m1x3(&ptr[0], 2); r_[0].sv64 = __riscv_vget_v_u32m1x3_u32m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u32m1x3_u32m1(dest, 1); @@ -380,7 +381,7 @@ simde_vld3_u64(uint64_t const *ptr) { return vld3_u64(ptr); #else simde_uint64x1_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(&ptr[0], 1); r_[0].sv64 = __riscv_vget_v_u64m1x3_u64m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u64m1x3_u64m1(dest, 1); @@ -413,7 +414,8 @@ simde_vld3q_f16(simde_float16_t const *ptr) { return vld3q_f16(ptr); #else simde_float16x8_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x3_t dest = __riscv_vlseg3e16_v_f16m1x3((_Float16 *)&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_f16m1x3_f16m1(dest, 0); r_[1].sv128 = __riscv_vget_v_f16m1x3_f16m1(dest, 1); @@ -445,7 +447,7 @@ simde_float32x4x3_t simde_vld3q_f32(simde_float32 const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_f32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_float32x4_private r_[3]; vfloat32m1x3_t dest = __riscv_vlseg3e32_v_f32m1x3(&ptr[0], 4); r_[0].sv128 = __riscv_vget_v_f32m1x3_f32m1(dest, 0); @@ -485,7 +487,7 @@ simde_float64x2x3_t simde_vld3q_f64(simde_float64 const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_f64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_float64x2_private r_[3]; vfloat64m1x3_t dest = __riscv_vlseg3e64_v_f64m1x3(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_f64m1x3_f64m1(dest, 0); @@ -525,7 +527,7 @@ simde_int8x16x3_t simde_vld3q_s8(int8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s8(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int8x16_private r_[3]; vint8m1x3_t dest = __riscv_vlseg3e8_v_i8m1x3(&ptr[0], 16); r_[0].sv128 = __riscv_vget_v_i8m1x3_i8m1(dest, 0); @@ -565,7 +567,7 @@ simde_int16x8x3_t simde_vld3q_s16(int16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int16x8_private r_[3]; vint16m1x3_t dest = __riscv_vlseg3e16_v_i16m1x3(&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_i16m1x3_i16m1(dest, 0); @@ -605,7 +607,7 @@ simde_int32x4x3_t simde_vld3q_s32(int32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int32x4_private r_[3]; vint32m1x3_t dest = __riscv_vlseg3e32_v_i32m1x3(&ptr[0], 4); r_[0].sv128 = __riscv_vget_v_i32m1x3_i32m1(dest, 0); @@ -645,7 +647,7 @@ simde_int64x2x3_t simde_vld3q_s64(int64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_s64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int64x2_private r_[3]; vint64m1x3_t dest = __riscv_vlseg3e64_v_i64m1x3(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_i64m1x3_i64m1(dest, 0); @@ -686,7 +688,7 @@ simde_uint8x16x3_t simde_vld3q_u8(uint8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u8(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint8x16_private r_[3]; vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(&ptr[0], 16); r_[0].sv128 = __riscv_vget_v_u8m1x3_u8m1(dest, 0); @@ -726,7 +728,7 @@ simde_uint16x8x3_t simde_vld3q_u16(uint16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u16(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint16x8_private r_[3]; vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_u16m1x3_u16m1(dest, 0); @@ -766,7 +768,7 @@ simde_uint32x4x3_t simde_vld3q_u32(uint32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u32(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint32x4_private r_[3]; vuint32m1x3_t dest = __riscv_vlseg3e32_v_u32m1x3(&ptr[0], 4); r_[0].sv128 = __riscv_vget_v_u32m1x3_u32m1(dest, 0); @@ -806,7 +808,7 @@ simde_uint64x2x3_t simde_vld3q_u64(uint64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_u64(ptr); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint64x2_private r_[3]; vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_u64m1x3_u64m1(dest, 0); @@ -849,7 +851,7 @@ simde_vld3_p8(simde_poly8_t const *ptr) { #else simde_poly8x8_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(&ptr[0], 8); r_[0].sv64 = __riscv_vget_v_u8m1x3_u8m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u8m1x3_u8m1(dest, 1); @@ -884,7 +886,7 @@ simde_vld3_p16(simde_poly16_t const *ptr) { #else simde_poly16x4_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(&ptr[0], 4); r_[0].sv64 = __riscv_vget_v_u16m1x3_u16m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u16m1x3_u16m1(dest, 1); @@ -919,7 +921,7 @@ simde_vld3_p64(simde_poly64_t const *ptr) { #else simde_poly64x1_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(&ptr[0], 1); r_[0].sv64 = __riscv_vget_v_u64m1x3_u64m1(dest, 0); r_[1].sv64 = __riscv_vget_v_u64m1x3_u64m1(dest, 1); @@ -954,7 +956,7 @@ simde_vld3q_p8(simde_poly8_t const *ptr) { #else simde_poly8x16_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(&ptr[0], 16); r_[0].sv128 = __riscv_vget_v_u8m1x3_u8m1(dest, 0); r_[1].sv128 = __riscv_vget_v_u8m1x3_u8m1(dest, 1); @@ -989,7 +991,7 @@ simde_vld3q_p16(simde_poly16_t const *ptr) { #else simde_poly16x8_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(&ptr[0], 8); r_[0].sv128 = __riscv_vget_v_u16m1x3_u16m1(dest, 0); r_[1].sv128 = __riscv_vget_v_u16m1x3_u16m1(dest, 1); @@ -1024,7 +1026,7 @@ simde_vld3q_p64(simde_poly64_t const *ptr) { #else simde_poly64x2_private r_[3]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(&ptr[0], 2); r_[0].sv128 = __riscv_vget_v_u64m1x3_u64m1(dest, 0); r_[1].sv128 = __riscv_vget_v_u64m1x3_u64m1(dest, 1); diff --git a/simde/arm/neon/ld4.h b/simde/arm/neon/ld4.h index 5f13ebbd6..e6a9b9487 100644 --- a/simde/arm/neon/ld4.h +++ b/simde/arm/neon/ld4.h @@ -48,7 +48,8 @@ simde_vld4_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld4_f16(ptr); #else simde_float16x4_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x4_t dest = __riscv_vlseg4e16_v_f16m1x4((_Float16 *)&ptr[0], 4); a_[0].sv64 = __riscv_vget_v_f16m1x4_f16m1(dest, 0); a_[1].sv64 = __riscv_vget_v_f16m1x4_f16m1(dest, 1); @@ -77,7 +78,7 @@ simde_vld4_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { return vld4_f32(ptr); #else simde_float32x2_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x4_t dest = __riscv_vlseg4e32_v_f32m1x4(&ptr[0], 2); a_[0].sv64 = __riscv_vget_v_f32m1x4_f32m1(dest, 0); a_[1].sv64 = __riscv_vget_v_f32m1x4_f32m1(dest, 1); @@ -105,7 +106,7 @@ simde_vld4_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) { return vld4_f64(ptr); #else simde_float64x1_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x4_t dest = __riscv_vlseg4e64_v_f64m1x4(&ptr[0], 1); a_[0].sv64 = __riscv_vget_v_f64m1x4_f64m1(dest, 0); a_[1].sv64 = __riscv_vget_v_f64m1x4_f64m1(dest, 1); @@ -133,7 +134,7 @@ simde_vld4_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { return vld4_s8(ptr); #else simde_int8x8_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x4_t dest = __riscv_vlseg4e8_v_i8m1x4(&ptr[0], 8); a_[0].sv64 = __riscv_vget_v_i8m1x4_i8m1(dest, 0); a_[1].sv64 = __riscv_vget_v_i8m1x4_i8m1(dest, 1); @@ -161,7 +162,7 @@ simde_vld4_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld4_s16(ptr); #else simde_int16x4_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x4_t dest = __riscv_vlseg4e16_v_i16m1x4(&ptr[0], 4); a_[0].sv64 = __riscv_vget_v_i16m1x4_i16m1(dest, 0); a_[1].sv64 = __riscv_vget_v_i16m1x4_i16m1(dest, 1); @@ -189,7 +190,7 @@ simde_vld4_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { return vld4_s32(ptr); #else simde_int32x2_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x4_t dest = __riscv_vlseg4e32_v_i32m1x4(&ptr[0], 2); a_[0].sv64 = __riscv_vget_v_i32m1x4_i32m1(dest, 0); a_[1].sv64 = __riscv_vget_v_i32m1x4_i32m1(dest, 1); @@ -217,7 +218,7 @@ simde_vld4_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { return vld4_s64(ptr); #else simde_int64x1_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x4_t dest = __riscv_vlseg4e64_v_i64m1x4(&ptr[0], 1); a_[0].sv64 = __riscv_vget_v_i64m1x4_i64m1(dest, 0); a_[1].sv64 = __riscv_vget_v_i64m1x4_i64m1(dest, 1); @@ -245,7 +246,7 @@ simde_vld4_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { return vld4_u8(ptr); #else simde_uint8x8_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(&ptr[0], 8); a_[0].sv64 = __riscv_vget_v_u8m1x4_u8m1(dest, 0); a_[1].sv64 = __riscv_vget_v_u8m1x4_u8m1(dest, 1); @@ -273,7 +274,7 @@ simde_vld4_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld4_u16(ptr); #else simde_uint16x4_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(&ptr[0], 4); a_[0].sv64 = __riscv_vget_v_u16m1x4_u16m1(dest, 0); a_[1].sv64 = __riscv_vget_v_u16m1x4_u16m1(dest, 1); @@ -301,7 +302,7 @@ simde_vld4_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { return vld4_u32(ptr); #else simde_uint32x2_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x4_t dest = __riscv_vlseg4e32_v_u32m1x4(&ptr[0], 2); a_[0].sv64 = __riscv_vget_v_u32m1x4_u32m1(dest, 0); a_[1].sv64 = __riscv_vget_v_u32m1x4_u32m1(dest, 1); @@ -329,7 +330,7 @@ simde_vld4_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { return vld4_u64(ptr); #else simde_uint64x1_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(&ptr[0], 1); a_[0].sv64 = __riscv_vget_v_u64m1x4_u64m1(dest, 0); a_[1].sv64 = __riscv_vget_v_u64m1x4_u64m1(dest, 1); @@ -357,7 +358,8 @@ simde_vld4q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { return vld4q_f16(ptr); #else simde_float16x8_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x4_t dest = __riscv_vlseg4e16_v_f16m1x4((_Float16 *)&ptr[0], 8); a_[0].sv128 = __riscv_vget_v_f16m1x4_f16m1(dest, 0); a_[1].sv128 = __riscv_vget_v_f16m1x4_f16m1(dest, 1); @@ -386,7 +388,7 @@ simde_vld4q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld4q_f32(ptr); #else simde_float32x4_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x4_t dest = __riscv_vlseg4e32_v_f32m1x4(&ptr[0], 4); a_[0].sv128 = __riscv_vget_v_f32m1x4_f32m1(dest, 0); a_[1].sv128 = __riscv_vget_v_f32m1x4_f32m1(dest, 1); @@ -414,7 +416,7 @@ simde_vld4q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(8)]) { return vld4q_f64(ptr); #else simde_float64x2_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x4_t dest = __riscv_vlseg4e64_v_f64m1x4(&ptr[0], 2); a_[0].sv128 = __riscv_vget_v_f64m1x4_f64m1(dest, 0); a_[1].sv128 = __riscv_vget_v_f64m1x4_f64m1(dest, 1); @@ -442,7 +444,7 @@ simde_vld4q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { return vld4q_s8(ptr); #else simde_int8x16_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x4_t dest = __riscv_vlseg4e8_v_i8m1x4(&ptr[0], 16); a_[0].sv128 = __riscv_vget_v_i8m1x4_i8m1(dest, 0); a_[1].sv128 = __riscv_vget_v_i8m1x4_i8m1(dest, 1); @@ -470,7 +472,7 @@ simde_vld4q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { return vld4q_s16(ptr); #else simde_int16x8_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x4_t dest = __riscv_vlseg4e16_v_i16m1x4(&ptr[0], 8); a_[0].sv128 = __riscv_vget_v_i16m1x4_i16m1(dest, 0); a_[1].sv128 = __riscv_vget_v_i16m1x4_i16m1(dest, 1); @@ -498,7 +500,7 @@ simde_vld4q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld4q_s32(ptr); #else simde_int32x4_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x4_t dest = __riscv_vlseg4e32_v_i32m1x4(&ptr[0], 4); a_[0].sv128 = __riscv_vget_v_i32m1x4_i32m1(dest, 0); a_[1].sv128 = __riscv_vget_v_i32m1x4_i32m1(dest, 1); @@ -526,7 +528,7 @@ simde_vld4q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { return vld4q_s64(ptr); #else simde_int64x2_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x4_t dest = __riscv_vlseg4e64_v_i64m1x4(&ptr[0], 2); a_[0].sv128 = __riscv_vget_v_i64m1x4_i64m1(dest, 0); a_[1].sv128 = __riscv_vget_v_i64m1x4_i64m1(dest, 1); @@ -590,7 +592,7 @@ simde_vld4q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { simde_uint8x16_from_private(r_[2]), simde_uint8x16_from_private(r_[3])}}; return s_; - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint8x16_private r_[4]; vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(&ptr[0], 16); r_[0].sv128 = __riscv_vget_v_u8m1x4_u8m1(dest, 0); @@ -626,7 +628,7 @@ simde_vld4q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { return vld4q_u16(ptr); #else simde_uint16x8_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(&ptr[0], 8); a_[0].sv128 = __riscv_vget_v_u16m1x4_u16m1(dest, 0); a_[1].sv128 = __riscv_vget_v_u16m1x4_u16m1(dest, 1); @@ -654,7 +656,7 @@ simde_vld4q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld4q_u32(ptr); #else simde_uint32x4_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x4_t dest = __riscv_vlseg4e32_v_u32m1x4(&ptr[0], 4); a_[0].sv128 = __riscv_vget_v_u32m1x4_u32m1(dest, 0); a_[1].sv128 = __riscv_vget_v_u32m1x4_u32m1(dest, 1); @@ -682,7 +684,7 @@ simde_vld4q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { return vld4q_u64(ptr); #else simde_uint64x2_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(&ptr[0], 2); a_[0].sv128 = __riscv_vget_v_u64m1x4_u64m1(dest, 0); a_[1].sv128 = __riscv_vget_v_u64m1x4_u64m1(dest, 1); @@ -710,7 +712,7 @@ simde_vld4_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { return vld4_p8(ptr); #else simde_poly8x8_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(&ptr[0], 8); a_[0].sv64 = __riscv_vget_v_u8m1x4_u8m1(dest, 0); a_[1].sv64 = __riscv_vget_v_u8m1x4_u8m1(dest, 1); @@ -738,7 +740,7 @@ simde_vld4_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { return vld4_p16(ptr); #else simde_poly16x4_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(&ptr[0], 4); a_[0].sv64 = __riscv_vget_v_u16m1x4_u16m1(dest, 0); a_[1].sv64 = __riscv_vget_v_u16m1x4_u16m1(dest, 1); @@ -766,7 +768,7 @@ simde_vld4_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { return vld4_p64(ptr); #else simde_poly64x1_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(&ptr[0], 1); a_[0].sv64 = __riscv_vget_v_u64m1x4_u64m1(dest, 0); a_[1].sv64 = __riscv_vget_v_u64m1x4_u64m1(dest, 1); @@ -794,7 +796,7 @@ simde_vld4q_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { return vld4q_p8(ptr); #else simde_poly8x16_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(&ptr[0], 16); a_[0].sv128 = __riscv_vget_v_u8m1x4_u8m1(dest, 0); a_[1].sv128 = __riscv_vget_v_u8m1x4_u8m1(dest, 1); @@ -822,7 +824,7 @@ simde_vld4q_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { return vld4q_p16(ptr); #else simde_poly16x8_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(&ptr[0], 8); a_[0].sv128 = __riscv_vget_v_u16m1x4_u16m1(dest, 0); a_[1].sv128 = __riscv_vget_v_u16m1x4_u16m1(dest, 1); @@ -850,7 +852,7 @@ simde_vld4q_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { return vld4q_p64(ptr); #else simde_poly64x2_private a_[4]; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(&ptr[0], 2); a_[0].sv128 = __riscv_vget_v_u64m1x4_u64m1(dest, 0); a_[1].sv128 = __riscv_vget_v_u64m1x4_u64m1(dest, 1); diff --git a/simde/arm/neon/st2.h b/simde/arm/neon/st2.h index d5bdc7ccb..20dc145a9 100644 --- a/simde/arm/neon/st2.h +++ b/simde/arm/neon/st2.h @@ -48,7 +48,8 @@ simde_vst2_f16(simde_float16_t *ptr, simde_float16x4x2_t val) { #else simde_float16x4_private a_[2] = {simde_float16x4_to_private(val.val[0]), simde_float16x4_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)ptr, 4); dest = __riscv_vset_v_f16m1_f16m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_f16m1_f16m1x2 (dest, 1, a_[1].sv64); @@ -76,7 +77,7 @@ simde_vst2_f32(simde_float32_t *ptr, simde_float32x2x2_t val) { #else simde_float32x2_private a_[2] = {simde_float32x2_to_private(val.val[0]), simde_float32x2_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(ptr, 2); dest = __riscv_vset_v_f32m1_f32m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_f32m1_f32m1x2 (dest, 1, a_[1].sv64); @@ -103,7 +104,7 @@ simde_vst2_f64(simde_float64_t *ptr, simde_float64x1x2_t val) { #else simde_float64x1_private a_[2] = {simde_float64x1_to_private(val.val[0]), simde_float64x1_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(ptr, 1); dest = __riscv_vset_v_f64m1_f64m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_f64m1_f64m1x2 (dest, 1, a_[1].sv64); @@ -130,7 +131,7 @@ simde_vst2_s8(int8_t *ptr, simde_int8x8x2_t val) { #else simde_int8x8_private a_[2] = {simde_int8x8_to_private(val.val[0]), simde_int8x8_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(ptr, 8); dest = __riscv_vset_v_i8m1_i8m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i8m1_i8m1x2 (dest, 1, a_[1].sv64); @@ -157,7 +158,7 @@ simde_vst2_s16(int16_t *ptr, simde_int16x4x2_t val) { #else simde_int16x4_private a_[2] = {simde_int16x4_to_private(val.val[0]), simde_int16x4_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(ptr, 4); dest = __riscv_vset_v_i16m1_i16m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i16m1_i16m1x2 (dest, 1, a_[1].sv64); @@ -184,7 +185,7 @@ simde_vst2_s32(int32_t *ptr, simde_int32x2x2_t val) { #else simde_int32x2_private a_[2] = {simde_int32x2_to_private(val.val[0]), simde_int32x2_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(ptr, 2); dest = __riscv_vset_v_i32m1_i32m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i32m1_i32m1x2 (dest, 1, a_[1].sv64); @@ -211,7 +212,7 @@ simde_vst2_s64(int64_t *ptr, simde_int64x1x2_t val) { #else simde_int64x1_private a_[2] = {simde_int64x1_to_private(val.val[0]), simde_int64x1_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(ptr, 1); dest = __riscv_vset_v_i64m1_i64m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i64m1_i64m1x2 (dest, 1, a_[1].sv64); @@ -254,7 +255,7 @@ simde_vst2_u8(uint8_t *ptr, simde_uint8x8x2_t val) { #else simde_uint8x8_private a_[2] = {simde_uint8x8_to_private(val.val[0]), simde_uint8x8_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(ptr, 8); dest = __riscv_vset_v_u8m1_u8m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u8m1_u8m1x2 (dest, 1, a_[1].sv64); @@ -281,7 +282,7 @@ simde_vst2_u16(uint16_t *ptr, simde_uint16x4x2_t val) { #else simde_uint16x4_private a_[2] = {simde_uint16x4_to_private(val.val[0]), simde_uint16x4_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(ptr, 4); dest = __riscv_vset_v_u16m1_u16m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u16m1_u16m1x2 (dest, 1, a_[1].sv64); @@ -308,7 +309,7 @@ simde_vst2_u32(uint32_t *ptr, simde_uint32x2x2_t val) { #else simde_uint32x2_private a_[2] = {simde_uint32x2_to_private(val.val[0]), simde_uint32x2_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(ptr, 2); dest = __riscv_vset_v_u32m1_u32m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u32m1_u32m1x2 (dest, 1, a_[1].sv64); @@ -335,7 +336,7 @@ simde_vst2_u64(uint64_t *ptr, simde_uint64x1x2_t val) { #else simde_uint64x1_private a_[2] = {simde_uint64x1_to_private(val.val[0]), simde_uint64x1_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(ptr, 1); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 1, a_[1].sv64); @@ -359,7 +360,8 @@ void simde_vst2q_f16(simde_float16_t *ptr, simde_float16x8x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) vst2q_f16(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) simde_float16x8_private a_[2] = {simde_float16x8_to_private(val.val[0]), simde_float16x8_to_private(val.val[1])}; vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)ptr, 8); @@ -383,7 +385,7 @@ void simde_vst2q_f32(simde_float32_t *ptr, simde_float32x4x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst2q_f32(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_float32x4_private a_[2] = {simde_float32x4_to_private(val.val[0]), simde_float32x4_to_private(val.val[1])}; vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(ptr, 4); @@ -409,7 +411,7 @@ simde_vst2q_f64(simde_float64_t *ptr, simde_float64x2x2_t val) { #else simde_float64x2_private a_[2] = {simde_float64x2_to_private(val.val[0]), simde_float64x2_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(ptr, 2); dest = __riscv_vset_v_f64m1_f64m1x2 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_f64m1_f64m1x2 (dest, 1, a_[1].sv128); @@ -433,7 +435,7 @@ void simde_vst2q_s8(int8_t *ptr, simde_int8x16x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst2q_s8(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int8x16_private a_[2] = {simde_int8x16_to_private(val.val[0]), simde_int8x16_to_private(val.val[1])}; vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(ptr, 16); @@ -456,7 +458,7 @@ void simde_vst2q_s16(int16_t *ptr, simde_int16x8x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst2q_s16(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int16x8_private a_[2] = {simde_int16x8_to_private(val.val[0]), simde_int16x8_to_private(val.val[1])}; vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(ptr, 8); @@ -479,7 +481,7 @@ void simde_vst2q_s32(int32_t *ptr, simde_int32x4x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst2q_s32(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int32x4_private a_[2] = {simde_int32x4_to_private(val.val[0]), simde_int32x4_to_private(val.val[1])}; vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(ptr, 4); @@ -502,7 +504,7 @@ void simde_vst2q_s64(int64_t *ptr, simde_int64x2x2_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst2q_s64(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_int64x2_private a_[2] = {simde_int64x2_to_private(val.val[0]), simde_int64x2_to_private(val.val[1])}; vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(ptr, 2); @@ -529,7 +531,7 @@ void simde_vst2q_u8(uint8_t *ptr, simde_uint8x16x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst2q_u8(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint8x16_private a_[2] = {simde_uint8x16_to_private(val.val[0]), simde_uint8x16_to_private(val.val[1])}; vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(ptr, 16); @@ -552,7 +554,7 @@ void simde_vst2q_u16(uint16_t *ptr, simde_uint16x8x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst2q_u16(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint16x8_private a_[2] = {simde_uint16x8_to_private(val.val[0]), simde_uint16x8_to_private(val.val[1])}; vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(ptr, 8); @@ -575,7 +577,7 @@ void simde_vst2q_u32(uint32_t *ptr, simde_uint32x4x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst2q_u32(ptr, val); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) simde_uint32x4_private a_[2] = {simde_uint32x4_to_private(val.val[0]), simde_uint32x4_to_private(val.val[1])}; vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(ptr, 4); @@ -601,7 +603,7 @@ simde_vst2q_u64(uint64_t *ptr, simde_uint64x2x2_t val) { #else simde_uint64x2_private a_[2] = {simde_uint64x2_to_private(val.val[0]), simde_uint64x2_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(ptr, 2); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 1, a_[1].sv128); @@ -628,7 +630,7 @@ simde_vst2_p8(simde_poly8_t *ptr, simde_poly8x8x2_t val) { #else simde_poly8x8_private a_[2] = {simde_poly8x8_to_private(val.val[0]), simde_poly8x8_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(ptr, 8); dest = __riscv_vset_v_u8m1_u8m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u8m1_u8m1x2 (dest, 1, a_[1].sv64); @@ -655,7 +657,7 @@ simde_vst2_p16(simde_poly16_t *ptr, simde_poly16x4x2_t val) { #else simde_poly16x4_private a_[2] = {simde_poly16x4_to_private(val.val[0]), simde_poly16x4_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(ptr, 4); dest = __riscv_vset_v_u16m1_u16m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u16m1_u16m1x2 (dest, 1, a_[1].sv64); @@ -682,7 +684,7 @@ simde_vst2_p64(simde_poly64_t *ptr, simde_poly64x1x2_t val) { #else simde_poly64x1_private a_[2] = {simde_poly64x1_to_private(val.val[0]), simde_poly64x1_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(ptr, 1); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 1, a_[1].sv64); @@ -709,7 +711,7 @@ simde_vst2q_p8(simde_poly8_t *ptr, simde_poly8x16x2_t val) { #else simde_poly8x16_private a_[2] = {simde_poly8x16_to_private(val.val[0]), simde_poly8x16_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(ptr, 16); dest = __riscv_vset_v_u8m1_u8m1x2 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u8m1_u8m1x2 (dest, 1, a_[1].sv128); @@ -736,7 +738,7 @@ simde_vst2q_p16(simde_poly16_t *ptr, simde_poly16x8x2_t val) { #else simde_poly16x8_private a_[2] = {simde_poly16x8_to_private(val.val[0]), simde_poly16x8_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(ptr, 8); dest = __riscv_vset_v_u16m1_u16m1x2 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u16m1_u16m1x2 (dest, 1, a_[1].sv128); @@ -763,7 +765,7 @@ simde_vst2q_p64(simde_poly64_t *ptr, simde_poly64x2x2_t val) { #else simde_poly64x2_private a_[2] = {simde_poly64x2_to_private(val.val[0]), simde_poly64x2_to_private(val.val[1])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(ptr, 2); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u64m1_u64m1x2 (dest, 1, a_[1].sv128); diff --git a/simde/arm/neon/st3.h b/simde/arm/neon/st3.h index 6095fff2c..8849fa130 100644 --- a/simde/arm/neon/st3.h +++ b/simde/arm/neon/st3.h @@ -48,7 +48,8 @@ simde_vst3_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float16x4x3_t simde_float16x4_private a[3] = { simde_float16x4_to_private(val.val[0]), simde_float16x4_to_private(val.val[1]), simde_float16x4_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x3_t dest = __riscv_vlseg3e16_v_f16m1x3((_Float16 *)ptr, 4); dest = __riscv_vset_v_f16m1_f16m1x3 (dest, 0, a[0].sv64); dest = __riscv_vset_v_f16m1_f16m1x3 (dest, 1, a[1].sv64); @@ -78,7 +79,7 @@ simde_vst3_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_float32x2x3_t v simde_float32x2_private a[3] = { simde_float32x2_to_private(val.val[0]), simde_float32x2_to_private(val.val[1]), simde_float32x2_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x3_t dest = __riscv_vlseg3e32_v_f32m1x3(ptr, 2); dest = __riscv_vset_v_f32m1_f32m1x3 (dest, 0, a[0].sv64); dest = __riscv_vset_v_f32m1_f32m1x3 (dest, 1, a[1].sv64); @@ -114,7 +115,7 @@ simde_vst3_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t v simde_float64x1_private a_[3] = { simde_float64x1_to_private(val.val[0]), simde_float64x1_to_private(val.val[1]), simde_float64x1_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x3_t dest = __riscv_vlseg3e64_v_f64m1x3(ptr, 1); dest = __riscv_vset_v_f64m1_f64m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_f64m1_f64m1x3 (dest, 1, a_[1].sv64); @@ -141,7 +142,7 @@ simde_vst3_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int8x8x3_t val) { simde_int8x8_private a_[3] = { simde_int8x8_to_private(val.val[0]), simde_int8x8_to_private(val.val[1]), simde_int8x8_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x3_t dest = __riscv_vlseg3e8_v_i8m1x3(ptr, 8); dest = __riscv_vset_v_i8m1_i8m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i8m1_i8m1x3 (dest, 1, a_[1].sv64); @@ -188,7 +189,7 @@ simde_vst3_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int16x4x3_t val) { simde_int16x4_private a_[3] = { simde_int16x4_to_private(val.val[0]), simde_int16x4_to_private(val.val[1]), simde_int16x4_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x3_t dest = __riscv_vlseg3e16_v_i16m1x3(ptr, 4); dest = __riscv_vset_v_i16m1_i16m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i16m1_i16m1x3 (dest, 1, a_[1].sv64); @@ -235,7 +236,7 @@ simde_vst3_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int32x2x3_t val) { simde_int32x2_private a[3] = { simde_int32x2_to_private(val.val[0]), simde_int32x2_to_private(val.val[1]), simde_int32x2_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x3_t dest = __riscv_vlseg3e32_v_i32m1x3(ptr, 2); dest = __riscv_vset_v_i32m1_i32m1x3 (dest, 0, a[0].sv64); dest = __riscv_vset_v_i32m1_i32m1x3 (dest, 1, a[1].sv64); @@ -271,7 +272,7 @@ simde_vst3_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t val) { simde_int64x1_private a_[3] = { simde_int64x1_to_private(val.val[0]), simde_int64x1_to_private(val.val[1]), simde_int64x1_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x3_t dest = __riscv_vlseg3e64_v_i64m1x3(ptr, 1); dest = __riscv_vset_v_i64m1_i64m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i64m1_i64m1x3 (dest, 1, a_[1].sv64); @@ -298,7 +299,7 @@ simde_vst3_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint8x8x3_t val) { simde_uint8x8_private a_[3] = { simde_uint8x8_to_private(val.val[0]), simde_uint8x8_to_private(val.val[1]), simde_uint8x8_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(ptr, 8); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 1, a_[1].sv64); @@ -345,7 +346,7 @@ simde_vst3_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint16x4x3_t val) { simde_uint16x4_private a_[3] = { simde_uint16x4_to_private(val.val[0]), simde_uint16x4_to_private(val.val[1]), simde_uint16x4_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(ptr, 4); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 1, a_[1].sv64); @@ -392,7 +393,7 @@ simde_vst3_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint32x2x3_t val) { simde_uint32x2_private a[3] = { simde_uint32x2_to_private(val.val[0]), simde_uint32x2_to_private(val.val[1]), simde_uint32x2_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x3_t dest = __riscv_vlseg3e32_v_u32m1x3(ptr, 2); dest = __riscv_vset_v_u32m1_u32m1x3 (dest, 0, a[0].sv64); dest = __riscv_vset_v_u32m1_u32m1x3 (dest, 1, a[1].sv64); @@ -428,7 +429,7 @@ simde_vst3_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val) { simde_uint64x1_private a_[3] = { simde_uint64x1_to_private(val.val[0]), simde_uint64x1_to_private(val.val[1]), simde_uint64x1_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(ptr, 1); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 1, a_[1].sv64); @@ -455,7 +456,8 @@ simde_vst3q_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_float16x8x3_t simde_float16x8_private a_[3] = { simde_float16x8_to_private(val.val[0]), simde_float16x8_to_private(val.val[1]), simde_float16x8_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x3_t dest = __riscv_vlseg3e16_v_f16m1x3((_Float16 *)ptr, 8); dest = __riscv_vset_v_f16m1_f16m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_f16m1_f16m1x3 (dest, 1, a_[1].sv128); @@ -485,7 +487,7 @@ simde_vst3q_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float32x4x3_t simde_float32x4_private a_[3] = { simde_float32x4_to_private(val.val[0]), simde_float32x4_to_private(val.val[1]), simde_float32x4_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x3_t dest = __riscv_vlseg3e32_v_f32m1x3(ptr, 4); dest = __riscv_vset_v_f32m1_f32m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_f32m1_f32m1x3 (dest, 1, a_[1].sv128); @@ -532,7 +534,7 @@ simde_vst3q_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_float64x2x3_t simde_float64x2_private a[3] = { simde_float64x2_to_private(val.val[0]), simde_float64x2_to_private(val.val[1]), simde_float64x2_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x3_t dest = __riscv_vlseg3e64_v_f64m1x3(ptr, 2); dest = __riscv_vset_v_f64m1_f64m1x3 (dest, 0, a[0].sv128); dest = __riscv_vset_v_f64m1_f64m1x3 (dest, 1, a[1].sv128); @@ -568,7 +570,7 @@ simde_vst3q_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_int8x16x3_t val) { simde_int8x16_private a_[3] = { simde_int8x16_to_private(val.val[0]), simde_int8x16_to_private(val.val[1]), simde_int8x16_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x3_t dest = __riscv_vlseg3e8_v_i8m1x3(ptr, 16); dest = __riscv_vset_v_i8m1_i8m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_i8m1_i8m1x3 (dest, 1, a_[1].sv128); @@ -620,7 +622,7 @@ simde_vst3q_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int16x8x3_t val) { simde_int16x8_private a_[3] = { simde_int16x8_to_private(val.val[0]), simde_int16x8_to_private(val.val[1]), simde_int16x8_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x3_t dest = __riscv_vlseg3e16_v_i16m1x3(ptr, 8); dest = __riscv_vset_v_i16m1_i16m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_i16m1_i16m1x3 (dest, 1, a_[1].sv128); @@ -667,7 +669,7 @@ simde_vst3q_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int32x4x3_t val) { simde_int32x4_private a_[3] = { simde_int32x4_to_private(val.val[0]), simde_int32x4_to_private(val.val[1]), simde_int32x4_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x3_t dest = __riscv_vlseg3e32_v_i32m1x3(ptr, 4); dest = __riscv_vset_v_i32m1_i32m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_i32m1_i32m1x3 (dest, 1, a_[1].sv128); @@ -714,7 +716,7 @@ simde_vst3q_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int64x2x3_t val) { simde_int64x2_private a[3] = { simde_int64x2_to_private(val.val[0]), simde_int64x2_to_private(val.val[1]), simde_int64x2_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x3_t dest = __riscv_vlseg3e64_v_i64m1x3(ptr, 2); dest = __riscv_vset_v_i64m1_i64m1x3 (dest, 0, a[0].sv128); dest = __riscv_vset_v_i64m1_i64m1x3 (dest, 1, a[1].sv128); @@ -782,7 +784,7 @@ simde_vst3q_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_uint8x16x3_t val) { v128_t m2 = wasm_i8x16_shuffle(r2, r1, 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); wasm_v128_store(ptr + 32, m2); - #elif defined(SIMDE_RISCV_V_NATIVE) + #elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(ptr, 16); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 1, a_[1].sv128); @@ -835,7 +837,7 @@ simde_vst3q_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint16x8x3_t val) { simde_uint16x8_to_private(val.val[1]), simde_uint16x8_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(ptr, 8); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 1, a_[1].sv128); @@ -883,7 +885,7 @@ simde_vst3q_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint32x4x3_t val) { simde_uint32x4_to_private(val.val[1]), simde_uint32x4_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x3_t dest = __riscv_vlseg3e32_v_u32m1x3(ptr, 4); dest = __riscv_vset_v_u32m1_u32m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u32m1_u32m1x3 (dest, 1, a_[1].sv128); @@ -930,7 +932,7 @@ simde_vst3q_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint64x2x3_t val) { simde_uint64x2_private a[3] = { simde_uint64x2_to_private(val.val[0]), simde_uint64x2_to_private(val.val[1]), simde_uint64x2_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(ptr, 2); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 0, a[0].sv128); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 1, a[1].sv128); @@ -966,7 +968,7 @@ simde_vst3_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_poly8x8x3_t val) simde_poly8x8_private a_[3] = { simde_poly8x8_to_private(val.val[0]), simde_poly8x8_to_private(val.val[1]), simde_poly8x8_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(ptr, 8); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 1, a_[1].sv64); @@ -995,7 +997,7 @@ simde_vst3_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_poly16x4x3_t va simde_poly16x4_private a_[3] = { simde_poly16x4_to_private(val.val[0]), simde_poly16x4_to_private(val.val[1]), simde_poly16x4_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(ptr, 4); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 1, a_[1].sv64); @@ -1024,7 +1026,7 @@ simde_vst3_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly64x1x3_t val simde_poly64x1_private a_[3] = { simde_poly64x1_to_private(val.val[0]), simde_poly64x1_to_private(val.val[1]), simde_poly64x1_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(ptr, 1); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 1, a_[1].sv64); @@ -1051,7 +1053,7 @@ simde_vst3q_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_poly8x16x3_t val simde_poly8x16_private a_[3] = {simde_poly8x16_to_private(val.val[0]), simde_poly8x16_to_private(val.val[1]), simde_poly8x16_to_private(val.val[2])}; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x3_t dest = __riscv_vlseg3e8_v_u8m1x3(ptr, 16); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u8m1_u8m1x3 (dest, 1, a_[1].sv128); @@ -1081,7 +1083,7 @@ simde_vst3q_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_poly16x8x3_t v simde_poly16x8_to_private(val.val[1]), simde_poly16x8_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x3_t dest = __riscv_vlseg3e16_v_u16m1x3(ptr, 8); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u16m1_u16m1x3 (dest, 1, a_[1].sv128); @@ -1110,7 +1112,7 @@ simde_vst3q_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_poly64x2x3_t va simde_poly64x2_private a_[3] = { simde_poly64x2_to_private(val.val[0]), simde_poly64x2_to_private(val.val[1]), simde_poly64x2_to_private(val.val[2]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x3_t dest = __riscv_vlseg3e64_v_u64m1x3(ptr, 2); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u64m1_u64m1x3 (dest, 1, a_[1].sv128); diff --git a/simde/arm/neon/st4.h b/simde/arm/neon/st4.h index 475f745a7..7a95b62ae 100644 --- a/simde/arm/neon/st4.h +++ b/simde/arm/neon/st4.h @@ -46,7 +46,8 @@ simde_vst4_f16(simde_float16_t *ptr, simde_float16x4x4_t val) { #else simde_float16x4_private a_[4] = { simde_float16x4_to_private(val.val[0]), simde_float16x4_to_private(val.val[1]), simde_float16x4_to_private(val.val[2]), simde_float16x4_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x4_t dest = __riscv_vlseg4e16_v_f16m1x4((_Float16 *)ptr, 4); dest = __riscv_vset_v_f16m1_f16m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_f16m1_f16m1x4 (dest, 1, a_[1].sv64); @@ -76,7 +77,7 @@ simde_vst4_f32(simde_float32_t *ptr, simde_float32x2x4_t val) { #else simde_float32x2_private a_[4] = { simde_float32x2_to_private(val.val[0]), simde_float32x2_to_private(val.val[1]), simde_float32x2_to_private(val.val[2]), simde_float32x2_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x4_t dest = __riscv_vlseg4e32_v_f32m1x4(ptr, 2); dest = __riscv_vset_v_f32m1_f32m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_f32m1_f32m1x4 (dest, 1, a_[1].sv64); @@ -105,7 +106,7 @@ simde_vst4_f64(simde_float64_t *ptr, simde_float64x1x4_t val) { #else simde_float64x1_private a_[4] = { simde_float64x1_to_private(val.val[0]), simde_float64x1_to_private(val.val[1]), simde_float64x1_to_private(val.val[2]), simde_float64x1_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x4_t dest = __riscv_vlseg4e64_v_f64m1x4(ptr, 1); dest = __riscv_vset_v_f64m1_f64m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_f64m1_f64m1x4 (dest, 1, a_[1].sv64); @@ -134,7 +135,7 @@ simde_vst4_s8(int8_t *ptr, simde_int8x8x4_t val) { #else simde_int8x8_private a_[4] = { simde_int8x8_to_private(val.val[0]), simde_int8x8_to_private(val.val[1]), simde_int8x8_to_private(val.val[2]), simde_int8x8_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x4_t dest = __riscv_vlseg4e8_v_i8m1x4(ptr, 8); dest = __riscv_vset_v_i8m1_i8m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i8m1_i8m1x4 (dest, 1, a_[1].sv64); @@ -163,7 +164,7 @@ simde_vst4_s16(int16_t *ptr, simde_int16x4x4_t val) { #else simde_int16x4_private a_[4] = { simde_int16x4_to_private(val.val[0]), simde_int16x4_to_private(val.val[1]), simde_int16x4_to_private(val.val[2]), simde_int16x4_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x4_t dest = __riscv_vlseg4e16_v_i16m1x4(ptr, 4); dest = __riscv_vset_v_i16m1_i16m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i16m1_i16m1x4 (dest, 1, a_[1].sv64); @@ -192,7 +193,7 @@ simde_vst4_s32(int32_t *ptr, simde_int32x2x4_t val) { #else simde_int32x2_private a_[4] = { simde_int32x2_to_private(val.val[0]), simde_int32x2_to_private(val.val[1]), simde_int32x2_to_private(val.val[2]), simde_int32x2_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x4_t dest = __riscv_vlseg4e32_v_i32m1x4(ptr, 2); dest = __riscv_vset_v_i32m1_i32m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i32m1_i32m1x4 (dest, 1, a_[1].sv64); @@ -221,7 +222,7 @@ simde_vst4_s64(int64_t *ptr, simde_int64x1x4_t val) { #else simde_int64x1_private a_[4] = { simde_int64x1_to_private(val.val[0]), simde_int64x1_to_private(val.val[1]), simde_int64x1_to_private(val.val[2]), simde_int64x1_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x4_t dest = __riscv_vlseg4e64_v_i64m1x4(ptr, 1); dest = __riscv_vset_v_i64m1_i64m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_i64m1_i64m1x4 (dest, 1, a_[1].sv64); @@ -271,7 +272,7 @@ simde_vst4_u8(uint8_t *ptr, simde_uint8x8x4_t val) { #else simde_uint8x8_private a_[4] = { simde_uint8x8_to_private(val.val[0]), simde_uint8x8_to_private(val.val[1]), simde_uint8x8_to_private(val.val[2]), simde_uint8x8_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(ptr, 8); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 1, a_[1].sv64); @@ -300,7 +301,7 @@ simde_vst4_u16(uint16_t *ptr, simde_uint16x4x4_t val) { #else simde_uint16x4_private a_[4] = { simde_uint16x4_to_private(val.val[0]), simde_uint16x4_to_private(val.val[1]), simde_uint16x4_to_private(val.val[2]), simde_uint16x4_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(ptr, 4); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 1, a_[1].sv64); @@ -329,7 +330,7 @@ simde_vst4_u32(uint32_t *ptr, simde_uint32x2x4_t val) { #else simde_uint32x2_private a_[4] = { simde_uint32x2_to_private(val.val[0]), simde_uint32x2_to_private(val.val[1]), simde_uint32x2_to_private(val.val[2]), simde_uint32x2_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x4_t dest = __riscv_vlseg4e32_v_u32m1x4(ptr, 2); dest = __riscv_vset_v_u32m1_u32m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u32m1_u32m1x4 (dest, 1, a_[1].sv64); @@ -358,7 +359,7 @@ simde_vst4_u64(uint64_t *ptr, simde_uint64x1x4_t val) { #else simde_uint64x1_private a_[4] = { simde_uint64x1_to_private(val.val[0]), simde_uint64x1_to_private(val.val[1]), simde_uint64x1_to_private(val.val[2]), simde_uint64x1_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(ptr, 1); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 1, a_[1].sv64); @@ -387,7 +388,8 @@ simde_vst4q_f16(simde_float16_t *ptr, simde_float16x8x4_t val) { #else simde_float16x8_private a_[4] = { simde_float16x8_to_private(val.val[0]), simde_float16x8_to_private(val.val[1]), simde_float16x8_to_private(val.val[2]), simde_float16x8_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \ + && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128) vfloat16m1x4_t dest = __riscv_vlseg4e16_v_f16m1x4((_Float16 *)ptr, 8); dest = __riscv_vset_v_f16m1_f16m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_f16m1_f16m1x4 (dest, 1, a_[1].sv128); @@ -417,7 +419,7 @@ simde_vst4q_f32(simde_float32_t *ptr, simde_float32x4x4_t val) { #else simde_float32x4_private a_[4] = { simde_float32x4_to_private(val.val[0]), simde_float32x4_to_private(val.val[1]), simde_float32x4_to_private(val.val[2]), simde_float32x4_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat32m1x4_t dest = __riscv_vlseg4e32_v_f32m1x4(ptr, 4); dest = __riscv_vset_v_f32m1_f32m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_f32m1_f32m1x4 (dest, 1, a_[1].sv128); @@ -446,7 +448,7 @@ simde_vst4q_f64(simde_float64_t *ptr, simde_float64x2x4_t val) { #else simde_float64x2_private a_[4] = { simde_float64x2_to_private(val.val[0]), simde_float64x2_to_private(val.val[1]), simde_float64x2_to_private(val.val[2]), simde_float64x2_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vfloat64m1x4_t dest = __riscv_vlseg4e64_v_f64m1x4(ptr, 2); dest = __riscv_vset_v_f64m1_f64m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_f64m1_f64m1x4 (dest, 1, a_[1].sv128); @@ -475,7 +477,7 @@ simde_vst4q_s8(int8_t *ptr, simde_int8x16x4_t val) { #else simde_int8x16_private a_[4] = { simde_int8x16_to_private(val.val[0]), simde_int8x16_to_private(val.val[1]), simde_int8x16_to_private(val.val[2]), simde_int8x16_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint8m1x4_t dest = __riscv_vlseg4e8_v_i8m1x4(ptr, 16); dest = __riscv_vset_v_i8m1_i8m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_i8m1_i8m1x4 (dest, 1, a_[1].sv128); @@ -504,7 +506,7 @@ simde_vst4q_s16(int16_t *ptr, simde_int16x8x4_t val) { #else simde_int16x8_private a_[4] = { simde_int16x8_to_private(val.val[0]), simde_int16x8_to_private(val.val[1]), simde_int16x8_to_private(val.val[2]), simde_int16x8_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint16m1x4_t dest = __riscv_vlseg4e16_v_i16m1x4(ptr, 8); dest = __riscv_vset_v_i16m1_i16m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_i16m1_i16m1x4 (dest, 1, a_[1].sv128); @@ -533,7 +535,7 @@ simde_vst4q_s32(int32_t *ptr, simde_int32x4x4_t val) { #else simde_int32x4_private a_[4] = { simde_int32x4_to_private(val.val[0]), simde_int32x4_to_private(val.val[1]), simde_int32x4_to_private(val.val[2]), simde_int32x4_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint32m1x4_t dest = __riscv_vlseg4e32_v_i32m1x4(ptr, 4); dest = __riscv_vset_v_i32m1_i32m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_i32m1_i32m1x4 (dest, 1, a_[1].sv128); @@ -562,7 +564,7 @@ simde_vst4q_s64(int64_t *ptr, simde_int64x2x4_t val) { #else simde_int64x2_private a_[4] = { simde_int64x2_to_private(val.val[0]), simde_int64x2_to_private(val.val[1]), simde_int64x2_to_private(val.val[2]), simde_int64x2_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vint64m1x4_t dest = __riscv_vlseg4e64_v_i64m1x4(ptr, 2); dest = __riscv_vset_v_i64m1_i64m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_i64m1_i64m1x4 (dest, 1, a_[1].sv128); @@ -592,7 +594,7 @@ simde_vst4q_u8(uint8_t *ptr, simde_uint8x16x4_t val) { #else simde_uint8x16_private a_[4] = { simde_uint8x16_to_private(val.val[0]), simde_uint8x16_to_private(val.val[1]), simde_uint8x16_to_private(val.val[2]), simde_uint8x16_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(ptr, 16); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 1, a_[1].sv128); @@ -621,7 +623,7 @@ simde_vst4q_u16(uint16_t *ptr, simde_uint16x8x4_t val) { #else simde_uint16x8_private a_[4] = { simde_uint16x8_to_private(val.val[0]), simde_uint16x8_to_private(val.val[1]), simde_uint16x8_to_private(val.val[2]), simde_uint16x8_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(ptr, 8); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 1, a_[1].sv128); @@ -650,7 +652,7 @@ simde_vst4q_u32(uint32_t *ptr, simde_uint32x4x4_t val) { #else simde_uint32x4_private a_[4] = { simde_uint32x4_to_private(val.val[0]), simde_uint32x4_to_private(val.val[1]), simde_uint32x4_to_private(val.val[2]), simde_uint32x4_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint32m1x4_t dest = __riscv_vlseg4e32_v_u32m1x4(ptr, 4); dest = __riscv_vset_v_u32m1_u32m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u32m1_u32m1x4 (dest, 1, a_[1].sv128); @@ -679,7 +681,7 @@ simde_vst4q_u64(uint64_t *ptr, simde_uint64x2x4_t val) { #else simde_uint64x2_private a_[4] = { simde_uint64x2_to_private(val.val[0]), simde_uint64x2_to_private(val.val[1]), simde_uint64x2_to_private(val.val[2]), simde_uint64x2_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(ptr, 2); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 1, a_[1].sv128); @@ -708,7 +710,7 @@ simde_vst4_p8(simde_poly8_t *ptr, simde_poly8x8x4_t val) { #else simde_poly8x8_private a_[4] = { simde_poly8x8_to_private(val.val[0]), simde_poly8x8_to_private(val.val[1]), simde_poly8x8_to_private(val.val[2]), simde_poly8x8_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(ptr, 8); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 1, a_[1].sv64); @@ -737,7 +739,7 @@ simde_vst4_p16(simde_poly16_t *ptr, simde_poly16x4x4_t val) { #else simde_poly16x4_private a_[4] = { simde_poly16x4_to_private(val.val[0]), simde_poly16x4_to_private(val.val[1]), simde_poly16x4_to_private(val.val[2]), simde_poly16x4_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(ptr, 4); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 1, a_[1].sv64); @@ -766,7 +768,7 @@ simde_vst4_p64(simde_poly64_t *ptr, simde_poly64x1x4_t val) { #else simde_poly64x1_private a_[4] = { simde_poly64x1_to_private(val.val[0]), simde_poly64x1_to_private(val.val[1]), simde_poly64x1_to_private(val.val[2]), simde_poly64x1_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(ptr, 1); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 0, a_[0].sv64); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 1, a_[1].sv64); @@ -795,7 +797,7 @@ simde_vst4q_p8(simde_poly8_t *ptr, simde_poly8x16x4_t val) { #else simde_poly8x16_private a_[4] = { simde_poly8x16_to_private(val.val[0]), simde_poly8x16_to_private(val.val[1]), simde_poly8x16_to_private(val.val[2]), simde_poly8x16_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint8m1x4_t dest = __riscv_vlseg4e8_v_u8m1x4(ptr, 16); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u8m1_u8m1x4 (dest, 1, a_[1].sv128); @@ -824,7 +826,7 @@ simde_vst4q_p16(simde_poly16_t *ptr, simde_poly16x8x4_t val) { #else simde_poly16x8_private a_[4] = { simde_poly16x8_to_private(val.val[0]), simde_poly16x8_to_private(val.val[1]), simde_poly16x8_to_private(val.val[2]), simde_poly16x8_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint16m1x4_t dest = __riscv_vlseg4e16_v_u16m1x4(ptr, 8); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u16m1_u16m1x4 (dest, 1, a_[1].sv128); @@ -853,7 +855,7 @@ simde_vst4q_p64(simde_poly64_t *ptr, simde_poly64x2x4_t val) { #else simde_poly64x2_private a_[4] = { simde_poly64x2_to_private(val.val[0]), simde_poly64x2_to_private(val.val[1]), simde_poly64x2_to_private(val.val[2]), simde_poly64x2_to_private(val.val[3]) }; - #if defined(SIMDE_RISCV_V_NATIVE) + #if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) vuint64m1x4_t dest = __riscv_vlseg4e64_v_u64m1x4(ptr, 2); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 0, a_[0].sv128); dest = __riscv_vset_v_u64m1_u64m1x4 (dest, 1, a_[1].sv128); diff --git a/simde/simde-arch.h b/simde/simde-arch.h index d4adccc74..c9eaa8152 100644 --- a/simde/simde-arch.h +++ b/simde/simde-arch.h @@ -550,6 +550,9 @@ #if defined(__riscv_zvfhmin) # define SIMDE_ARCH_RISCV_ZVFHMIN 1 #endif +#if defined(__riscv_zvlsseg) || defined(__riscv_v) +# define SIMDE_ARCH_RISCV_ZVLSSEG 1 +#endif /* SPARC */