Skip to content

Commit

Permalink
Merge pull request #31 from guzba/ryan
Browse files Browse the repository at this point in the history
for cpp, wow much cast
  • Loading branch information
guzba authored Nov 25, 2024
2 parents 756463a + 8b5f131 commit e485e9f
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 21 deletions.
2 changes: 1 addition & 1 deletion nimsimd.nimble
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = "1.3.0"
version = "1.3.1"
author = "Ryan Oldenburg"
description = "Pleasant Nim bindings for SIMD instruction sets."
license = "MIT"
Expand Down
26 changes: 19 additions & 7 deletions src/nimsimd/avx.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import sse42
export sse42

type
M256* {.importc: "__m256", header: "immintrin.h".} = object
M256i* {.importc: "__m256i", header: "immintrin.h".} = object
M256d* {.importc: "__m256d", header: "immintrin.h".} = object
M256* {.importc: "__m256", header: "immintrin.h", bycopy.} = object
M256i* {.importc: "__m256i", header: "immintrin.h", bycopy.} = object
M256d* {.importc: "__m256d", header: "immintrin.h", bycopy.} = object

{.push header: "immintrin.h".}

Expand Down Expand Up @@ -157,9 +157,15 @@ func mm256_load_si256*(p: pointer): M256i {.importc: "_mm256_load_si256".}

func mm256_loadu_pd*(p: pointer): M256d {.importc: "_mm256_loadu_pd".}

func mm256_loadu_ps*(p: pointer): M256 {.importc: "_mm256_loadu_ps".}
func mm256_loadu_ps(p: ptr float32): M256 {.importc: "_mm256_loadu_ps".}

func mm256_loadu_si256*(p: pointer): M256i {.importc: "_mm256_loadu_si256".}
template mm256_loadu_ps*(p: pointer): M256 =
mm256_loadu_ps(cast[ptr float32](p))

func mm256_loadu_si256(p: ptr M256i): M256i {.importc: "_mm256_loadu_si256".}

template mm256_loadu_si256*(p: pointer): M256i =
mm256_loadu_si256(cast[ptr M256i](p))

func mm256_loadu2_m128*(hi, lo: pointer): M256 {.importc: "_mm256_loadu2_m128".}

Expand Down Expand Up @@ -309,9 +315,15 @@ func mm256_store_si256*(p: pointer, a: M256i) {.importc: "_mm256_store_si256".}

func mm256_storeu_pd*(p: pointer, a: M256d) {.importc: "_mm256_storeu_pd".}

func mm256_storeu_ps*(p: pointer, a: M256) {.importc: "_mm256_storeu_ps".}
func mm256_storeu_ps(p: ptr float32, a: M256) {.importc: "_mm256_storeu_ps".}

template mm256_storeu_ps*(p: pointer, a: M256) =
mm256_storeu_ps(cast[ptr float32](p), a)

func mm256_storeu_si256(p: ptr M256i, a: M256i) {.importc: "_mm256_storeu_si256".}

func mm256_storeu_si256*(p: pointer, a: M256i) {.importc: "_mm256_storeu_si256".}
template mm256_storeu_si256*(p: pointer, a: M256i) =
mm256_storeu_si256(cast[ptr M256i](p), a)

func mm256_storeu2_m128*(hi, lo: pointer) {.importc: "_mm256_storeu2_m128".}

Expand Down
10 changes: 8 additions & 2 deletions src/nimsimd/avx2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,17 @@ func mm256_madd_epi16*(a, b: M256i): M256i {.importc: "_mm256_madd_epi16".}

func mm256_maddubs_epi16*(a, b: M256i): M256i {.importc: "_mm256_maddubs_epi16".}

func mm256_maskload_epi32*(p: pointer, mask: M256i): M256i {.importc: "_mm256_maskload_epi32".}
func mm256_maskload_epi32(p: ptr int32, mask: M256i): M256i {.importc: "_mm256_maskload_epi32".}

template mm256_maskload_epi32*(p: pointer, mask: M256i): M256i =
mm256_maskload_epi32(cast[ptr int32](p), mask)

func mm256_maskload_epi64*(p: pointer, mask: M256i): M256i {.importc: "_mm256_maskload_epi64".}

func mm256_maskstore_epi32*(p: pointer, mask, a: M256i) {.importc: "_mm256_maskstore_epi32".}
func mm256_maskstore_epi32(p: ptr int32, mask, a: M256i) {.importc: "_mm256_maskstore_epi32".}

template mm256_maskstore_epi32*(p: pointer, mask, a: M256i) =
mm256_maskstore_epi32(cast[ptr int32](p), mask, a)

func mm256_maskstore_epi64*(p: pointer, mask, a: M256i) {.importc: "_mm256_maskstore_epi64".}

Expand Down
6 changes: 3 additions & 3 deletions src/nimsimd/runtimecheck.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ when defined(amd64):
PCLMULQDQ
SHA
AES
CMPXCHG16B # Atomic CompareExchange 16-byte, avail. since Haswell
CMPXCHG16B
F16C
BM1
BM2
BMI1
BMI2

InstructionSetCheckInfo = object
leaf, register, bit: int
Expand Down
31 changes: 23 additions & 8 deletions src/nimsimd/sse2.nim
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
## SSE and SSE2 intrinsics

type
M128* {.importc: "__m128", header: "xmmintrin.h".} = object
M128i* {.importc: "__m128i", header: "emmintrin.h".} = object
M128d* {.importc: "__m128d", header: "emmintrin.h".} = object
M128* {.importc: "__m128", header: "xmmintrin.h", bycopy.} = object
M128i* {.importc: "__m128i", header: "emmintrin.h", bycopy.} = object
M128d* {.importc: "__m128d", header: "emmintrin.h", bycopy.} = object

template MM_SHUFFLE*(z, y, x, w: int | uint): int32 =
((z shl 6) or (y shl 4) or (x shl 2) or w).int32
Expand Down Expand Up @@ -191,7 +191,10 @@ func mm_store1_ps*(p: pointer, a: M128) {.importc: "_mm_store1_ps".}

func mm_storer_ps*(p: pointer, a: M128) {.importc: "_mm_storer_ps".}

func mm_storeu_ps*(p: pointer, a: M128) {.importc: "_mm_storeu_ps".}
func mm_storeu_ps(p: ptr float32, a: M128) {.importc: "_mm_storeu_ps".}

template mm_storeu_ps*(p: pointer, a: M128) =
mm_storeu_ps(cast[ptr float32](p), a)

func mm_storeu_si16*(p: pointer, a: M128) {.importc: "_mm_storeu_si16".}

Expand Down Expand Up @@ -419,13 +422,19 @@ func mm_load_pd1*(p: pointer): M128d {.importc: "_mm_load_pd1".}

func mm_load_sd*(p: pointer): M128d {.importc: "_mm_load_sd".}

func mm_load_si128*(p: pointer): M128i {.importc: "_mm_load_si128".}
func mm_load_si128(p: ptr M128i): M128i {.importc: "_mm_load_si128".}

template mm_load_si128*(p: pointer): M128i =
mm_load_si128(cast[ptr M128i](p))

func mm_load1_pd*(p: pointer): M128d {.importc: "_mm_load1_pd".}

func mm_loadh_pd*(a: M128d, p: pointer): M128d {.importc: "_mm_loadh_pd".}

func mm_loadl_epi64*(p: pointer): M128i {.importc: "_mm_loadl_epi64".}
func mm_loadl_epi64(p: ptr M128i): M128i {.importc: "_mm_loadl_epi64".}

template mm_loadl_epi64*(p: pointer): M128i =
mm_loadl_epi64(cast[ptr M128i](p))

func mm_loadl_pd*(a: M128d, p: pointer): M128i {.importc: "_mm_loadl_pd".}

Expand All @@ -435,7 +444,10 @@ func mm_loadr_pd*(p: pointer): M128d {.importc: "_mm_loadr_pd".}

func mm_loadu_pd*(p: pointer): M128d {.importc: "_mm_loadu_pd".}

func mm_loadu_si128*(p: pointer): M128i {.importc: "_mm_loadu_si128".}
func mm_loadu_si128(p: ptr M128i): M128i {.importc: "_mm_loadu_si128".}

template mm_loadu_si128*(p: pointer): M128i =
mm_loadu_si128(cast[ptr M128i](p))

func mm_loadu_si32*(p: pointer): M128i {.importc: "_mm_loadu_si32".}

Expand Down Expand Up @@ -595,7 +607,10 @@ func mm_storer_pd*(p: pointer, a: M128d) {.importc: "_mm_storer_pd".}

func mm_storeu_pd*(p: pointer, a: M128d) {.importc: "_mm_storeu_pd".}

func mm_storeu_si128*(p: pointer, a: M128i) {.importc: "_mm_storeu_si128".}
func mm_storeu_si128(p: ptr M128i, a: M128i) {.importc: "_mm_storeu_si128".}

template mm_storeu_si128*(p: pointer, a: M128i) =
mm_storeu_si128(cast[ptr M128i](p), a)

func mm_storeu_si32*(p: pointer, a: M128i) {.importc: "_mm_storeu_si32".}

Expand Down

0 comments on commit e485e9f

Please sign in to comment.