Skip to content

Commit

Permalink
Added cmpneq.
Browse files Browse the repository at this point in the history
  • Loading branch information
bkaradzic committed Feb 6, 2020
1 parent f48bd19 commit a3fd8d3
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 21 deletions.
8 changes: 8 additions & 0 deletions include/bx/inline/simd128_langext.inl
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,14 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmpneq(simd128_langext_t _a, simd128_langext_t _b)
{
simd128_langext_t result;
result.vi = _a.vf != _b.vf;
return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmplt(simd128_langext_t _a, simd128_langext_t _b)
{
Expand Down
40 changes: 19 additions & 21 deletions include/bx/inline/simd128_neon.inl
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,15 @@

namespace bx
{
#if BX_COMPILER_CLANG

#define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) \
__builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 )
#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) \
__builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 )

#if BX_COMPILER_CLANG
# define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) __builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 )
# define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 )
#else

#define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) \
__builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 })
#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) \
__builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 })

# define SHUFFLE_A(_a, _i0, _i1, _i2, _i3) __builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 })
# define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 })
#endif


#define ELEMx 0
#define ELEMy 1
#define ELEMz 2
Expand Down Expand Up @@ -291,38 +283,44 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpneq(simd128_neon_t _a, simd128_neon_t _b)
{
return simd_cmpneq_ni(_a, _b);
}

template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmplt(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcltq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcltq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);

return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmple(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcleq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcleq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);

return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpgt(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcgtq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcgtq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);

return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpge(simd128_neon_t _a, simd128_neon_t _b)
{
const uint32x4_t tmp = vcgeq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
const uint32x4_t tmp = vcgeq_f32(_a, _b);
const simd128_neon_t result = vreinterpretq_f32_u32(tmp);

return result;
}
Expand Down
11 changes: 11 additions & 0 deletions include/bx/inline/simd128_ref.inl
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,17 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmpneq(simd128_ref_t _a, simd128_ref_t _b)
{
simd128_ref_t result;
result.ixyzw[0] = _a.fxyzw[0] != _b.fxyzw[0] ? 0xffffffff : 0x0;
result.ixyzw[1] = _a.fxyzw[1] != _b.fxyzw[1] ? 0xffffffff : 0x0;
result.ixyzw[2] = _a.fxyzw[2] != _b.fxyzw[2] ? 0xffffffff : 0x0;
result.ixyzw[3] = _a.fxyzw[3] != _b.fxyzw[3] ? 0xffffffff : 0x0;
return result;
}

template<>
BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmplt(simd128_ref_t _a, simd128_ref_t _b)
{
Expand Down
6 changes: 6 additions & 0 deletions include/bx/inline/simd128_sse.inl
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,12 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
return _mm_cmpeq_ps(_a, _b);
}

template<>
BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmpneq(simd128_sse_t _a, simd128_sse_t _b)
{
return _mm_cmpneq_ps(_a, _b);
}

template<>
BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmplt(simd128_sse_t _a, simd128_sse_t _b)
{
Expand Down
9 changes: 9 additions & 0 deletions include/bx/inline/simd_ni.inl
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@ namespace bx
return result;
}

template<typename Ty>
BX_SIMD_INLINE Ty simd_cmpneq_ni(Ty _a, Ty _b)
{
const Ty tmp0 = simd_cmpeq(_a, _b);
const Ty result = simd_not(tmp0);

return result;
}

template<typename Ty>
BX_SIMD_INLINE Ty simd_min_ni(Ty _a, Ty _b)
{
Expand Down
3 changes: 3 additions & 0 deletions include/bx/simd_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw);
template<typename Ty>
Ty simd_cmpeq(Ty _a, Ty _b);

template<typename Ty>
Ty simd_cmpneq(Ty _a, Ty _b);

template<typename Ty>
Ty simd_cmplt(Ty _a, Ty _b);

Expand Down

0 comments on commit a3fd8d3

Please sign in to comment.