Skip to content

Commit

Permalink
Merge pull request #32 from guzba/ryan
Browse files Browse the repository at this point in the history
1.3.2
  • Loading branch information
guzba authored Dec 22, 2024
2 parents e485e9f + ddba087 commit 3f6b266
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
nim-version: ['1.2.x', '1.4.x', '1.6.x', 'stable']
nim-version: ['1.2.x', '1.4.x', '1.6.x', '2.0.x', 'stable']
include:
- nim-version: '1.4.x'
gc_orc: true
Expand All @@ -17,7 +17,7 @@ jobs:

steps:
- uses: actions/checkout@v4
- uses: jiro4989/setup-nim-action@v1
- uses: jiro4989/setup-nim-action@v2
with:
nim-version: ${{ matrix.nim-version }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion nimsimd.nimble
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = "1.3.1"
version = "1.3.2"
author = "Ryan Oldenburg"
description = "Pleasant Nim bindings for SIMD instruction sets."
license = "MIT"
Expand Down
21 changes: 19 additions & 2 deletions src/nimsimd/neon.nim
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ func vld1q_lane_u16*(p: pointer, v: uint16x8, lane: int32): uint16x8
func vld1q_lane_u32*(p: pointer, v: uint32x4, lane: int32): uint32x4
func vld1q_lane_u64*(p: pointer, v: uint64x2, lane: int32): uint64x2

func vld1_lane_u32*(p: pointer, v: uint32x2, lane: int32): uint32x2
func vld1_lane_u16*(p: pointer, v: uint16x4, lane: int32): uint16x4
func vld1_lane_u8*(p: pointer, v: uint8x8, lane: int32): uint8x8

func vld1q_dup_f64*(p: pointer): float64x2

func vst1q_s32*(p: pointer, v: int32x4)
Expand All @@ -201,9 +205,12 @@ func vst1_u8*(p: pointer, v: uint8x8)
func vst1_u16*(p: pointer, v: uint16x4)
func vst1_u32*(p: pointer, v: uint32x2)
func vst1_u64*(p: pointer, v: uint64x1)

func vst1_f32*(p: pointer, v: float32x2)

func vst1_lane_u32*(p: pointer, v: uint32x2, lane: int32)
func vst1_lane_u16*(p: pointer, v: uint16x4, lane: int32)
func vst1_lane_u8*(p: pointer, v: uint8x8, lane: int32)

func vst2_u8*(p: pointer, v: uint8x8x2)
func vst2_u16*(p: pointer, v: uint16x4x2)
func vst2_u32*(p: pointer, v: uint32x2x2)
Expand Down Expand Up @@ -396,6 +403,9 @@ func vextq_f64*(a, b: float64x2, n: int): float64x2

func vminvq_u32*(a: uint32x4): uint32
func vminvq_f32*(a: float32x4): float32
func vminvq_u8*(a: uint8x16): uint8

func vminv_u8*(a: uint8x8): uint8

func vmaxq_u8*(a, b: uint8x16): uint8x16

Expand Down Expand Up @@ -451,6 +461,10 @@ func vreinterpretq_s32_u8*(a: uint8x16): int32x4
func vreinterpretq_s32_u32*(a: uint32x4): int32x4
func vreinterpretq_u16_u8*(a: uint8x16): uint16x8

func vreinterpret_u32_u8*(a: uint8x8): uint32x2
func vreinterpret_u8_u16*(a: uint16x4): uint8x8
func vreinterpret_u16_u8*(a: uint8x8): uint16x4
func vreinterpret_u8_u32*(a: uint32x2): uint8x8
func vreinterpret_u64_u8*(a: uint8x8): uint64x1
func vreinterpret_s32_u32*(a: uint32x2): int32x2
func vreinterpret_f32_u64*(a: uint64x1): float32x2
Expand All @@ -464,4 +478,7 @@ func uint32x4_immediate*(v3, v2, v1, v0: static uint32): uint32x4 {.inline.} =
{.emit: [result, " = (uint32x4_t){", v0, ",", v1, ",", v2, ",", v3, "};"].}

func uint8x16_immediate*(v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0: static uint8): uint8x16 {.inline.} =
{.emit: [result, " = (uint8x16_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, ",", v8, ",", v9, ",", v10, ",", v11, ",", v12, ",", v13, ",", v14, ",", v15, "};"].}
{.emit: [result, " = (uint8x8_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, ",", v8, ",", v9, ",", v10, ",", v11, ",", v12, ",", v13, ",", v14, ",", v15, "};"].}

func uint8x8_immediate*(v7, v6, v5, v4, v3, v2, v1, v0: static uint8): uint8x8 {.inline.} =
{.emit: [result, " = (uint8x8_t){", v0, ",", v1, ",", v2, ",", v3, ",", v4, ",", v5, ",", v6, ",", v7, "};"].}
6 changes: 3 additions & 3 deletions src/nimsimd/sse2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ func mm_set_epi32*(a, b, c, d: int32 | uint32): M128i {.importc: "_mm_set_epi32"

func mm_set_epi64x*(a, b: int64 | uint64): M128i {.importc: "_mm_set_epi64x".}

func mm_set_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8): M128i {.importc: "_mm_set_epi8".}
func mm_set_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8 | char): M128i {.importc: "_mm_set_epi8".}

func mm_set_pd*(a, b: float64): M128d {.importc: "_mm_set_pd".}

Expand All @@ -523,15 +523,15 @@ func mm_set1_epi32*(a: int32 | uint32): M128i {.importc: "_mm_set1_epi32".}

func mm_set1_epi64x*(a: int64 | uint64): M128i {.importc: "_mm_set1_epi64x".}

func mm_set1_epi8*(a: int8 | uint8): M128i {.importc: "_mm_set1_epi8".}
func mm_set1_epi8*(a: int8 | uint8 | char): M128i {.importc: "_mm_set1_epi8".}

func mm_set1_pd*(a: float64): M128d {.importc: "_mm_set1_pd".}

func mm_setr_epi16*(a, b, c, d, e, f, g, h: int16): M128i {.importc: "_mm_setr_epi16".}

func mm_setr_epi32*(a, b, c, d: int32 | uint32): M128i {.importc: "_mm_setr_epi32".}

func mm_setr_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: uint8): M128i {.importc: "_mm_setr_epi8".}
func mm_setr_epi8*(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p: int8 | uint8 | char): M128i {.importc: "_mm_setr_epi8".}

func mm_setr_pd*(a, b: float64): M128d {.importc: "_mm_setr_pd".}

Expand Down

0 comments on commit 3f6b266

Please sign in to comment.