From 2082c53240b539ca48278b4458d86b94deff2900 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 1 Aug 2023 16:16:11 -0700 Subject: [PATCH] Add riscv_bitmanip.h. Add Zbb instrinsics. Split crypto and bitmanip. --- riscv-c-api.md | 138 +++++++++++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 56 deletions(-) diff --git a/riscv-c-api.md b/riscv-c-api.md index 99d5e40..b0da499 100644 --- a/riscv-c-api.md +++ b/riscv-c-api.md @@ -230,18 +230,17 @@ RISC-V intrinsics examples: vint8m1_t __riscv_vadd_vv_i8m1(vint8m1_t vs2, vint8m1_t vs1, size_t vl); // vadd.vv vd, vs2, vs1 ``` -### Scalar Cryptography Extension Intrinsics +### Scalar Bit Manipulation Extension Intrinsics -In order to access the RISC-V scalar crypto intrinsics, it is necessary to -include the header file `riscv_crypto.h`. +In order to access the RISC-V scalar bit manipulation intrinsics, it is +necessary to include the header file `riscv_bitmanip.h`. The functions are only only available if the compiler's `-march` string enables the required ISA extension. (Calling functions for not enabled ISA extensions will lead to compile-time and/or link-time errors.) Intrinsics operating on XLEN sized value are not available as there is no type -defined. If `xlen_t` is added in the future, this can be revisited. It is -assumed that cryptographic code will be written using fixed bit widths. +defined. If `xlen_t` is added in the future, this can be revisited. Unsigned types are used as that is the most logical representation for a collection of bits. @@ -255,57 +254,84 @@ promotion rules and how to handle signed types. Sign extension of 32-bit values on RV64 is not reflected in the interface. -| Prototype | Instruction | Extension | Notes | -| --------- | ----------- | --------- | ----- | -| `uint32_t __riscv_ror_32(uint32_t x, uint32_t shamt);` | `ror[i][w]` | Zbb, Zbkb | | -| `uint64_t __riscv_ror_64(uint64_t x, uint32_t shamt);` | `ror[i]` | Zbb, Zbkb (RV64) | | -| `uint32_t __riscv_rol_32(uint32_t x, uint32_t shamt);` | `rol[w]`/`ror[i][w]` | Zbb, Zbkb | | -| `uint64_t __riscv_rol_64(uint64_t x, uint32_t shamt);` | `rol`/`rori` | Zbb, Zbkb (RV64) | | -| `uint32_t __riscv_rev8_32(uint32_t x);` | `rev8` | Zbb, Zbkb | Emulated with `rev8`+`srai` on RV64 | -| `uint64_t __riscv_rev8_64(uint64_t x);` | `rev8` | Zbb, Zbkb (RV64) | | -| `uint32_t __riscv_brev8_32(uint32_t x);` | `brev8` | Zbkb | Emulated with `clmul`+`sext.w` on RV64 | -| `uint64_t __riscv_brev8_64(uint64_t x);` | `brev8` | Zbkb (RV64) | | -| `uint32_t __riscv_zip_32(uint32_t x);` | `zip` | Zbkb (RV32) | No emulation for RV64 | -| `uint32_t __riscv_unzip_32(uint32_t x);` | `unzip` | Zbkb (RV32) | No emulation for RV64 | -| `uint32_t __riscv_clmul_32(uint32_t x);` | `clmul` | Zbc, Zbkc | Emulated with `clmul`+`sext.w` on RV64 | -| `uint64_t __riscv_clmul_64(uint64_t x);` | `clmul` | Zbc, Zbkc (RV64) | | -| `uint32_t __riscv_clmulh_32(uint32_t x);` | `clmulh` | Zbc, Zbkc (RV32) | Emulation on RV64 requires 4-6 instructions | -| `uint64_t __riscv_clmulh_64(uint64_t x);` | `clmulh` | Zbc, Zbkc (RV64) | | -| `uint32_t __riscv_xperm4_32(uint32_t rs1, uint32_t rs2);` | `xperm4` | Zbkx (RV32) | No emulation for RV64 | -| `uint64_t __riscv_xperm4_64(uint64_t rs1, uint64_t rs2);` | `xperm4` | Zbkx (RV64) | | -| `uint32_t __riscv_xperm8_32(uint32_t rs1, uint32_t rs2);` | `xperm8` | Zbkx (RV32) | No emulation for RV64 | -| `uint64_t __riscv_xperm8_64(uint64_t rs1, uint64_t rs2);` | `xperm8` | Zbkx (RV64) | | -| `uint32_t __riscv_aes32dsi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32dsi` | Zknd (RV32) | `bs`=[0..3] | -| `uint32_t __riscv_aes32dsmi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32dsmi` | Zknd (RV32) | `bs`=[0..3] | -| `uint64_t __riscv_aes64ds(uint64 rs1, uint64_t rs2);` | `aes64ds` | Zknd (RV64) | | -| `uint64_t __riscv_aes64dsm(uint64 rs1, uint64_t rs2);` | `aes64dsm` | Zknd (RV64) | | -| `uint64_t __riscv_aes64im(uint64 rs1);` | `aes64im` | Zknd (RV64) | `rnum`=[0..10] | -| `uint64_t __riscv_aes64ks1i(uint64 rs1, const int rnum);` | `aes64ks1i` | Zknd, Zkne (RV64) | `rnum`=[0..10] | -| `uint64_t __riscv_aes64ks2(uint64 rs1, uint64_t rs2);` | `aes64ks2` | Zknd, Zkne (RV64) | | -| `uint32_t __riscv_aes32esi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32esi` | Zkne (RV32) | `bs`=[0..3] | -| `uint32_t __riscv_aes32esmi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32esmi` | Zkne (RV32) | `bs`=[0..3] | -| `uint64_t __riscv_aes64es(uint64 rs1, uint64_t rs2);` | `aes32es` | Zkne (RV64) | | -| `uint64_t __riscv_aes64esm(uint64 rs1, uint64_t rs2);` | `aes32esm` | Zkne (RV64) | | -| `uint32_t __riscv_sha256sig0(uint32_t rs1);` | `sha256sig0` | Zknh | | -| `uint32_t __riscv_sha256sig1(uint32_t rs1);` | `sha256sig1` | Zknh | | -| `uint32_t __riscv_sha256sum0(uint32_t rs1);` | `sha256sum0` | Zknh | | -| `uint32_t __riscv_sha256sum1(uint32_t rs1);` | `sha256sum1` | Zknh | | -| `uint32_t __riscv_sha512sig0h(uint32_t rs1, uint32_t rs2);` | `sha512sig0h` | Zknh (RV32) | | -| `uint32_t __riscv_sha512sig0l(uint32_t rs1, uint32_t rs2);` | `sha512sig0l` | Zknh (RV32) | | -| `uint32_t __riscv_sha512sig1h(uint32_t rs1, uint32_t rs2);` | `sha512sig1h` | Zknh (RV32) | | -| `uint32_t __riscv_sha512sig1l(uint32_t rs1, uint32_t rs2);` | `sha512sig1l` | Zknh (RV32) | | -| `uint32_t __riscv_sha512sum0h(uint32_t rs1, uint32_t rs2);` | `sha512sum0h` | Zknh (RV32) | | -| `uint32_t __riscv_sha512sum0l(uint32_t rs1, uint32_t rs2);` | `sha512sum0l` | Zknh (RV32) | | -| `uint32_t __riscv_sha512sum1h(uint32_t rs1, uint32_t rs2);` | `sha512sum1h` | Zknh (RV32) | | -| `uint32_t __riscv_sha512sum1l(uint32_t rs1, uint32_t rs2);` | `sha512sum1l` | Zknh (RV32) | | -| `uint64_t __riscv_sha512sig0(uint64_t rs1);` | `sha512sig0` | Zknh (RV64) | | -| `uint64_t __riscv_sha512sig1(uint64_t rs1);` | `sha512sig1` | Zknh (RV64) | | -| `uint64_t __riscv_sha512sum0(uint64_t rs1);` | `sha512sum0` | Zknh (RV64) | | -| `uint64_t __riscv_sha512sum1(uint64_t rs1);` | `sha512sum1` | Zknh (RV64) | | -| `uint32_t __riscv_sm3p0(uint32_t rs1);` | `sm3p0` | Zksh | | -| `uint32_t __riscv_sm3p1(uint32_t rs1);` | `sm3p1` | Zksh | | -| `uint32_t __riscv_sm4ed(uint32_t rs1, uint32_t rs2, const int bs);` | `sm4ed` | Zksed | `bs`=[0..3] | -| `uint32_t __riscv_sm4ks(uint32_t rs1, uint32_t rs2, const int bs);` | `sm4ks` | Zksed | `bs`=[0..3] | +| Prototype | Instruction | Extension | Notes | +| --------- | ----------- | --------- | ----- | +| `unsigned __riscv_clz_32(uint32_t x);` | `clz[w]` | Zbb | | +| `unsigned __riscv_clz_64(uint64_t x);` | `clz` | Zbb (RV64) | | +| `unsigned __riscv_ctz_32(uint32_t x);` | `ctz[w]` | Zbb | | +| `unsigned __riscv_ctz_64(uint64_t x);` | `ctz` | Zbb (RV64) | | +| `unsigned __riscv_cpop_32(uint32_t x);` | `cpop[w]` | Zbb | | +| `unsigned __riscv_cpop_64(uint64_t x);` | `cpop` | Zbb (RV64) | | +| `uint32_t __riscv_orc_b_32(uint32_t x);` | `orc.b` | Zbb | Emulated with `rev8`+`sext.w` on RV64 | +| `uint64_t __riscv_orc_b_64(uint64_t x);` | `orc.b` | Zbb (RV64) | | +| `uint32_t __riscv_ror_32(uint32_t x, uint32_t shamt);` | `ror[i][w]` | Zbb, Zbkb | | +| `uint64_t __riscv_ror_64(uint64_t x, uint32_t shamt);` | `ror[i]` | Zbb, Zbkb (RV64) | | +| `uint32_t __riscv_rol_32(uint32_t x, uint32_t shamt);` | `rol[w]`/`rori[w]` | Zbb, Zbkb | | +| `uint64_t __riscv_rol_64(uint64_t x, uint32_t shamt);` | `rol`/`rori` | Zbb, Zbkb (RV64) | | +| `uint32_t __riscv_rev8_32(uint32_t x);` | `rev8` | Zbb, Zbkb | Emulated with `rev8`+`srai` on RV64 | +| `uint64_t __riscv_rev8_64(uint64_t x);` | `rev8` | Zbb, Zbkb (RV64) | | +| `uint32_t __riscv_brev8_32(uint32_t x);` | `brev8` | Zbkb | Emulated with `clmul`+`sext.w` on RV64 | +| `uint64_t __riscv_brev8_64(uint64_t x);` | `brev8` | Zbkb (RV64) | | +| `uint32_t __riscv_zip_32(uint32_t x);` | `zip` | Zbkb (RV32) | No emulation for RV64 | +| `uint32_t __riscv_unzip_32(uint32_t x);` | `unzip` | Zbkb (RV32) | No emulation for RV64 | +| `uint32_t __riscv_clmul_32(uint32_t x);` | `clmul` | Zbc, Zbkc | Emulated with `clmul`+`sext.w` on RV64 | +| `uint64_t __riscv_clmul_64(uint64_t x);` | `clmul` | Zbc, Zbkc (RV64) | | +| `uint32_t __riscv_clmulh_32(uint32_t x);` | `clmulh` | Zbc, Zbkc (RV32) | Emulation on RV64 requires 4-6 instructions | +| `uint64_t __riscv_clmulh_64(uint64_t x);` | `clmulh` | Zbc, Zbkc (RV64) | | +| `uint32_t __riscv_clmulr_32(uint32_t x);` | `clmulr` | Zbc | Emulation on RV64 requires 4-6 instructions | +| `uint64_t __riscv_clmulr_64(uint64_t x);` | `clmulr` | Zbc (RV64) | | +| `uint32_t __riscv_xperm4_32(uint32_t rs1, uint32_t rs2);` | `xperm4` | Zbkx (RV32) | No emulation for RV64 | +| `uint64_t __riscv_xperm4_64(uint64_t rs1, uint64_t rs2);` | `xperm4` | Zbkx (RV64) | | +| `uint32_t __riscv_xperm8_32(uint32_t rs1, uint32_t rs2);` | `xperm8` | Zbkx (RV32) | No emulation for RV64 | +| `uint64_t __riscv_xperm8_64(uint64_t rs1, uint64_t rs2);` | `xperm8` | Zbkx (RV64) | | + +### Scalar Cryptography Extension Intrinsics + +In order to access the RISC-V scalar crypto intrinsics, it is necessary to +include the header file `riscv_crypto.h`. + +The functions are only only available if the compiler's `-march` string +enables the required ISA extension. (Calling functions for not enabled +ISA extensions will lead to compile-time and/or link-time errors.) + +Unsigned types are used as that is the most logical representation for a +collection of bits. + +Sign extension of 32-bit values on RV64 is not reflected in the interface. + +| Prototype | Instruction | Extension | Notes | +| --------- | ----------- | --------- | ----- | +| `uint32_t __riscv_aes32dsi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32dsi` | Zknd (RV32) | `bs`=[0..3] | +| `uint32_t __riscv_aes32dsmi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32dsmi` | Zknd (RV32) | `bs`=[0..3] | +| `uint64_t __riscv_aes64ds(uint64 rs1, uint64_t rs2);` | `aes64ds` | Zknd (RV64) | | +| `uint64_t __riscv_aes64dsm(uint64 rs1, uint64_t rs2);` | `aes64dsm` | Zknd (RV64) | | +| `uint64_t __riscv_aes64im(uint64 rs1);` | `aes64im` | Zknd (RV64) | `rnum`=[0..10] | +| `uint64_t __riscv_aes64ks1i(uint64 rs1, const int rnum);` | `aes64ks1i` | Zknd, Zkne (RV64) | `rnum`=[0..10] | +| `uint64_t __riscv_aes64ks2(uint64 rs1, uint64_t rs2);` | `aes64ks2` | Zknd, Zkne (RV64) | | +| `uint32_t __riscv_aes32esi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32esi` | Zkne (RV32) | `bs`=[0..3] | +| `uint32_t __riscv_aes32esmi(uint32_t rs1, uint32_t rs2, const int bs);` | `aes32esmi` | Zkne (RV32) | `bs`=[0..3] | +| `uint64_t __riscv_aes64es(uint64 rs1, uint64_t rs2);` | `aes32es` | Zkne (RV64) | | +| `uint64_t __riscv_aes64esm(uint64 rs1, uint64_t rs2);` | `aes32esm` | Zkne (RV64) | | +| `uint32_t __riscv_sha256sig0(uint32_t rs1);` | `sha256sig0` | Zknh | | +| `uint32_t __riscv_sha256sig1(uint32_t rs1);` | `sha256sig1` | Zknh | | +| `uint32_t __riscv_sha256sum0(uint32_t rs1);` | `sha256sum0` | Zknh | | +| `uint32_t __riscv_sha256sum1(uint32_t rs1);` | `sha256sum1` | Zknh | | +| `uint32_t __riscv_sha512sig0h(uint32_t rs1, uint32_t rs2);` | `sha512sig0h` | Zknh (RV32) | | +| `uint32_t __riscv_sha512sig0l(uint32_t rs1, uint32_t rs2);` | `sha512sig0l` | Zknh (RV32) | | +| `uint32_t __riscv_sha512sig1h(uint32_t rs1, uint32_t rs2);` | `sha512sig1h` | Zknh (RV32) | | +| `uint32_t __riscv_sha512sig1l(uint32_t rs1, uint32_t rs2);` | `sha512sig1l` | Zknh (RV32) | | +| `uint32_t __riscv_sha512sum0h(uint32_t rs1, uint32_t rs2);` | `sha512sum0h` | Zknh (RV32) | | +| `uint32_t __riscv_sha512sum0l(uint32_t rs1, uint32_t rs2);` | `sha512sum0l` | Zknh (RV32) | | +| `uint32_t __riscv_sha512sum1h(uint32_t rs1, uint32_t rs2);` | `sha512sum1h` | Zknh (RV32) | | +| `uint32_t __riscv_sha512sum1l(uint32_t rs1, uint32_t rs2);` | `sha512sum1l` | Zknh (RV32) | | +| `uint64_t __riscv_sha512sig0(uint64_t rs1);` | `sha512sig0` | Zknh (RV64) | | +| `uint64_t __riscv_sha512sig1(uint64_t rs1);` | `sha512sig1` | Zknh (RV64) | | +| `uint64_t __riscv_sha512sum0(uint64_t rs1);` | `sha512sum0` | Zknh (RV64) | | +| `uint64_t __riscv_sha512sum1(uint64_t rs1);` | `sha512sum1` | Zknh (RV64) | | +| `uint32_t __riscv_sm3p0(uint32_t rs1);` | `sm3p0` | Zksh | | +| `uint32_t __riscv_sm3p1(uint32_t rs1);` | `sm3p1` | Zksh | | +| `uint32_t __riscv_sm4ed(uint32_t rs1, uint32_t rs2, const int bs);` | `sm4ed` | Zksed | `bs`=[0..3] | +| `uint32_t __riscv_sm4ks(uint32_t rs1, uint32_t rs2, const int bs);` | `sm4ks` | Zksed | `bs`=[0..3] | ### Cryptography Intrinsics Implementation Guarantees