From a3fd8d384f29e0febc4a348a27f2c68cde4582e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?= =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?= Date: Thu, 6 Feb 2020 08:31:17 -0800 Subject: [PATCH] Added cmpneq. --- include/bx/inline/simd128_langext.inl | 8 ++++++ include/bx/inline/simd128_neon.inl | 40 +++++++++++++-------------- include/bx/inline/simd128_ref.inl | 11 ++++++++ include/bx/inline/simd128_sse.inl | 6 ++++ include/bx/inline/simd_ni.inl | 9 ++++++ include/bx/simd_t.h | 3 ++ 6 files changed, 56 insertions(+), 21 deletions(-) diff --git a/include/bx/inline/simd128_langext.inl b/include/bx/inline/simd128_langext.inl index d48e55376..a0a66affd 100644 --- a/include/bx/inline/simd128_langext.inl +++ b/include/bx/inline/simd128_langext.inl @@ -346,6 +346,14 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf); return result; } + template<> + BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmpneq(simd128_langext_t _a, simd128_langext_t _b) + { + simd128_langext_t result; + result.vi = _a.vf != _b.vf; + return result; + } + template<> BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmplt(simd128_langext_t _a, simd128_langext_t _b) { diff --git a/include/bx/inline/simd128_neon.inl b/include/bx/inline/simd128_neon.inl index 5dfce0de5..dd1ebb0bd 100644 --- a/include/bx/inline/simd128_neon.inl +++ b/include/bx/inline/simd128_neon.inl @@ -9,23 +9,15 @@ namespace bx { -#if BX_COMPILER_CLANG -#define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) \ -__builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 ) -#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) \ -__builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 ) - +#if BX_COMPILER_CLANG +# define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) __builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 ) +# define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 ) #else - -#define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) \ -__builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 }) -#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) \ -__builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 }) - +# define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) __builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 }) +# define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 }) #endif - #define ELEMx 0 #define ELEMy 1 #define ELEMz 2 @@ -291,11 +283,17 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww); return result; } + template<> + BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpneq(simd128_neon_t _a, simd128_neon_t _b) + { + return simd_cmpneq_ni(_a, _b); + } + template<> BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmplt(simd128_neon_t _a, simd128_neon_t _b) { - const uint32x4_t tmp = vcltq_f32(_a, _b); - const simd128_neon_t result = vreinterpretq_f32_u32(tmp); + const uint32x4_t tmp = vcltq_f32(_a, _b); + const simd128_neon_t result = vreinterpretq_f32_u32(tmp); return result; } @@ -303,8 +301,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww); template<> BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmple(simd128_neon_t _a, simd128_neon_t _b) { - const uint32x4_t tmp = vcleq_f32(_a, _b); - const simd128_neon_t result = vreinterpretq_f32_u32(tmp); + const uint32x4_t tmp = vcleq_f32(_a, _b); + const simd128_neon_t result = vreinterpretq_f32_u32(tmp); return result; } @@ -312,8 +310,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww); template<> BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpgt(simd128_neon_t _a, simd128_neon_t _b) { - const uint32x4_t tmp = vcgtq_f32(_a, _b); - const simd128_neon_t result = vreinterpretq_f32_u32(tmp); + const uint32x4_t tmp = vcgtq_f32(_a, _b); + const simd128_neon_t result = vreinterpretq_f32_u32(tmp); return result; } @@ -321,8 +319,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww); template<> BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpge(simd128_neon_t _a, simd128_neon_t _b) { - const uint32x4_t tmp = vcgeq_f32(_a, _b); - const simd128_neon_t result = vreinterpretq_f32_u32(tmp); + const uint32x4_t tmp = vcgeq_f32(_a, _b); + const simd128_neon_t result = vreinterpretq_f32_u32(tmp); return result; } diff --git a/include/bx/inline/simd128_ref.inl b/include/bx/inline/simd128_ref.inl index b99976e29..7b91af517 100644 --- a/include/bx/inline/simd128_ref.inl +++ b/include/bx/inline/simd128_ref.inl @@ -396,6 +396,17 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf); return result; } + template<> + BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmpneq(simd128_ref_t _a, simd128_ref_t _b) + { + simd128_ref_t result; + result.ixyzw[0] = _a.fxyzw[0] != _b.fxyzw[0] ? 0xffffffff : 0x0; + result.ixyzw[1] = _a.fxyzw[1] != _b.fxyzw[1] ? 0xffffffff : 0x0; + result.ixyzw[2] = _a.fxyzw[2] != _b.fxyzw[2] ? 0xffffffff : 0x0; + result.ixyzw[3] = _a.fxyzw[3] != _b.fxyzw[3] ? 0xffffffff : 0x0; + return result; + } + template<> BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmplt(simd128_ref_t _a, simd128_ref_t _b) { diff --git a/include/bx/inline/simd128_sse.inl b/include/bx/inline/simd128_sse.inl index 2fe09e33c..79f185bbc 100644 --- a/include/bx/inline/simd128_sse.inl +++ b/include/bx/inline/simd128_sse.inl @@ -308,6 +308,12 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf); return _mm_cmpeq_ps(_a, _b); } + template<> + BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmpneq(simd128_sse_t _a, simd128_sse_t _b) + { + return _mm_cmpneq_ps(_a, _b); + } + template<> BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmplt(simd128_sse_t _a, simd128_sse_t _b) { diff --git a/include/bx/inline/simd_ni.inl b/include/bx/inline/simd_ni.inl index 69d5110f4..499e54f26 100644 --- a/include/bx/inline/simd_ni.inl +++ b/include/bx/inline/simd_ni.inl @@ -124,6 +124,15 @@ namespace bx return result; } + template + BX_SIMD_INLINE Ty simd_cmpneq_ni(Ty _a, Ty _b) + { + const Ty tmp0 = simd_cmpeq(_a, _b); + const Ty result = simd_not(tmp0); + + return result; + } + template BX_SIMD_INLINE Ty simd_min_ni(Ty _a, Ty _b) { diff --git a/include/bx/simd_t.h b/include/bx/simd_t.h index 0b0756731..5e5fefa07 100644 --- a/include/bx/simd_t.h +++ b/include/bx/simd_t.h @@ -196,6 +196,9 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw); template Ty simd_cmpeq(Ty _a, Ty _b); + template + Ty simd_cmpneq(Ty _a, Ty _b); + template Ty simd_cmplt(Ty _a, Ty _b);