From 4e692ed3d0c8e680b53b3a61899ec6aa9779805d Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Fri, 1 Mar 2024 09:09:33 +0000 Subject: [PATCH] Add a README --- README.md | 18 ++++++++++++++++++ src/tblz.zig | 8 ++++---- 2 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..3748505 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# ZIMD + +Additional cross-platform SIMD support for Zig. + +Based loosely on [Google Highway](https://github.com/google/highway) + +## Why? + +Zig has builtin support for SIMD operations using `@Vector`. However this only supports a few +basic operations. This library aims to fill in some of the blanks. + +## Operators + +### [TableLookupBytesOr0](https://google.github.io/highway/en/master/quick_reference.html#blockwise) + +Architectures: Scalar, X86_SSE3, Arm_Neon + +Similar to Zig's `@shuffle` operator, except doesn't require the shuffle mask to be comptime known. diff --git a/src/tblz.zig b/src/tblz.zig index 2f61542..fb71226 100644 --- a/src/tblz.zig +++ b/src/tblz.zig @@ -21,9 +21,11 @@ const builtin = @import("builtin"); const std = @import("std"); const zimd = @import("zimd.zig"); -const TableLookupBytesOr0 = fn (bytes: @Vector(16, u8), indices: @Vector(16, i8)) callconv(.Inline) @Vector(16, u8); +pub const TableLookupBytesOr0 = fn (bytes: @Vector(16, u8), indices: @Vector(16, i8)) callconv(.Inline) @Vector(16, u8); -pub fn GetTableLookupBytesOr0(comptime cpu: std.Target.Cpu) TableLookupBytesOr0 { +pub const tableLookupBytesOr0 = GetTableLookupBytesOr0(builtin.cpu); + +fn GetTableLookupBytesOr0(comptime cpu: std.Target.Cpu) TableLookupBytesOr0 { if (comptime cpu.arch.isAARCH64() and std.Target.aarch64.featureSetHas(cpu.features, .neon)) { return Aarch64_Neon; } @@ -33,8 +35,6 @@ pub fn GetTableLookupBytesOr0(comptime cpu: std.Target.Cpu) TableLookupBytesOr0 return Scalar; } -pub const tableLookupBytesOr0 = GetTableLookupBytesOr0(builtin.cpu); - // For all vector widths; Arm anyway zeroes if >= 0x10. inline fn Aarch64_Neon(bytes: @Vector(16, u8), indices: @Vector(16, i8)) @Vector(16, u8) { return asm ("tbl.16b %[ret], { %[v0] }, %[v1]"