Skip to content

Commit

Permalink
Update SM4 and GF128
Browse files Browse the repository at this point in the history
Merge sm4_tbox.c into sm4.c, optimize GMUL for AAarch64
  • Loading branch information
guanzhi committed Apr 11, 2024
1 parent 2d0378f commit b83ff15
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 258 deletions.
22 changes: 10 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ SET(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake")

include_directories(include)


option(ENABLE_SM4_TBOX "Enable SM4 merged S-Box implementation" ON)
option(ENABLE_GMUL_AARCH64 "Enable GF(2^128) Multiplication AArch64 assembly" OFF)



set(src
src/version.c
src/debug.c
Expand Down Expand Up @@ -249,12 +255,11 @@ if (ENABLE_SM2_ALGOR_ID_ENCODE_NULL)
endif()


option(ENABLE_GMUL_ARMV8 "Enable GF(2^128) Multiplication ARMv8 assembly" OFF)
if (ENABLE_GMUL_ARMV8)
message(STATUS "ENABLE_GMUL_ARMV8 is ON")
add_definitions(-DENABLE_GMUL_ARMV8)
if (ENABLE_GMUL_AARCH64)
message(STATUS "ENABLE_GMUL_AARCH64 is ON")
add_definitions(-DENABLE_GMUL_AARCH64)
enable_language(ASM)
list(APPEND src src/gf128_armv8.S)
list(APPEND src src/gf128_aarch64.S)
endif()

option(ENABLE_SM2_Z256_ARMV8 "Enable SM2_Z256 ARMv8 assembly" OFF)
Expand Down Expand Up @@ -302,13 +307,6 @@ if (ENABLE_SM3_SSE)
list(INSERT src ${sm3_index} src/sm3_sse.c)
endif()

option (ENABLE_SM4_TBOX "Enable SM4 merged S-Box implementation" OFF)
if (ENABLE_SM4_TBOX)
message(STATUS "ENABLE_SM4_TBOX is ON")
list(FIND src src/sm4.c sm4_index)
list(REMOVE_AT src ${sm4_index})
list(INSERT src ${sm4_index} src/sm4_tbox.c)
endif()

option(ENABLE_SM4_AARCH64 "Enable SM4 AARCH64 assembly implementation" OFF)
if (ENABLE_SM4_AARCH64)
Expand Down
2 changes: 1 addition & 1 deletion src/gf128.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ gf128_t gf128_add(gf128_t a, gf128_t b)
return r;
}

#ifndef ENABLE_GMUL_ARMV8
#ifndef ENABLE_GMUL_AARCH64
gf128_t gf128_mul(gf128_t a, gf128_t b)
{
gf128_t r = {0, 0};
Expand Down
97 changes: 97 additions & 0 deletions src/gf128_aarch64.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright 2014-2024 The GmSSL Project. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*/


v5.16b, v1.16b, v1.16b, #8/* GF(2^128) defined by f(x) = x^128 + x^7 + x^2 + x + 1

f0 = x^128 = x^7 + x^2 + x + 1
ext([a0,a1],[b0,b1],8) => [a1,b0]

a * b
= (a0 + a1 * x^64) + (b0 + b1 * x^64)
= a0 * b0 + (a0 * b1 + a1 * b0) * x^64 + a1 * b1 * x^128
= a0 * b0 + ((a0 + a1)*(b0 + b1) - a0*b0 - a1*b1) * x^64 + a1 * b1 * x^128
= c + e * x^64 + d' * x^128
= c + e0 * x^64 + e1 * x^128 + d' * x^128
= c + e0 * x^64 + (d' + e1) * f0
= c + e0 * x^64 + d * f0
= c + e0 * x^64 + (d0 + d1 * x^64) * f0
= c + e0 * x^64 + d0 * f0 + (d1 * f0) * x^64 -- w = d1 * f0
= c + e0 * x^64 + d0 * f0 + (w0 + w1 * x^64) * x^64
= c + e0 * x^64 + d0 * f0 + w0 * x^64 + w1 * x^128
= c + e0 * x^64 + w0 * x^64 + d0 * f0 + w1 * f0
= c + (e0 + w0) * x^64 + (d0 + w1) * f0
*/
.text
.globl _gmul
.align 4
_gmul:
// load (a0, a1)
ld1 {v1.2d},[x1]
// load (b0, b1)
ld1 {v2.2d},[x2]

// prepare zero
eor v0.16b, v0.16b, v0.16b

// set f(x) = x^7 + x^2 + x + 1 (0x87)
movi v7.16b, #0x87
ushr v7.2d, v7.2d, #56

// Multiply: 3*mul + 2*ext + 4*eor

// c = a0 * b0
pmull v3.1q, v1.1d, v2.1d

// a0 + a1
ext v5.16b, v1.16b, v1.16b, #8
eor v5.16b, v5.16b, v1.16b

// d' = a1 * b1
pmull2 v4.1q, v1.2d, v2.2d

// b0 + b1
ext v6.16b, v2.16b, v2.16b, #8
eor v6.16b, v6.16b, v2.16b

// e = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1
pmull v5.1q, v5.1d, v6.1d
eor v5.16b, v5.16b, v3.16b
eor v5.16b, v5.16b, v4.16b

// Reduce: 2*mul + 3*ext + 5*eor

// d = d' + e1
ext v6.16b, v5.16b, v0.16b, #8
eor v4.16b, v4.16b, v6.16b

// w = d1 * f0
pmull2 v6.1q, v4.2d, v7.2d

// (e0 + w0) * x^64
eor v5.16b, v5.16b, v6.16b
ext v5.16b, v0.16b, v5.16b, #8

// c = c + (e0 + w0) * x^64
eor v3.16b, v3.16b, v5.16b

// (d0 + w1) * f0
ext v6.16b, v6.16b, v6.16b, #8
eor v4.16b, v4.16b, v6.16b
pmull v4.1q, v4.1d, v7.1d

// c += (d0 + w1) * f0
eor v3.16b, v3.16b, v4.16b

// Output
st1 {v3.2d}, [x0]

ret


103 changes: 102 additions & 1 deletion src/sm4.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2014-2022 The GmSSL Project. All Rights Reserved.
* Copyright 2014-2024 The GmSSL Project. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
Expand Down Expand Up @@ -144,6 +144,106 @@ void sm4_set_decrypt_key(SM4_KEY *key, const uint8_t user_key[16])
}
}

#ifdef ENABLE_SM4_TBOX
const uint32_t T[256] = {
0x8ed55b5bU, 0xd0924242U, 0x4deaa7a7U, 0x06fdfbfbU,
0xfccf3333U, 0x65e28787U, 0xc93df4f4U, 0x6bb5dedeU,
0x4e165858U, 0x6eb4dadaU, 0x44145050U, 0xcac10b0bU,
0x8828a0a0U, 0x17f8efefU, 0x9c2cb0b0U, 0x11051414U,
0x872bacacU, 0xfb669d9dU, 0xf2986a6aU, 0xae77d9d9U,
0x822aa8a8U, 0x46bcfafaU, 0x14041010U, 0xcfc00f0fU,
0x02a8aaaaU, 0x54451111U, 0x5f134c4cU, 0xbe269898U,
0x6d482525U, 0x9e841a1aU, 0x1e061818U, 0xfd9b6666U,
0xec9e7272U, 0x4a430909U, 0x10514141U, 0x24f7d3d3U,
0xd5934646U, 0x53ecbfbfU, 0xf89a6262U, 0x927be9e9U,
0xff33ccccU, 0x04555151U, 0x270b2c2cU, 0x4f420d0dU,
0x59eeb7b7U, 0xf3cc3f3fU, 0x1caeb2b2U, 0xea638989U,
0x74e79393U, 0x7fb1ceceU, 0x6c1c7070U, 0x0daba6a6U,
0xedca2727U, 0x28082020U, 0x48eba3a3U, 0xc1975656U,
0x80820202U, 0xa3dc7f7fU, 0xc4965252U, 0x12f9ebebU,
0xa174d5d5U, 0xb38d3e3eU, 0xc33ffcfcU, 0x3ea49a9aU,
0x5b461d1dU, 0x1b071c1cU, 0x3ba59e9eU, 0x0cfff3f3U,
0x3ff0cfcfU, 0xbf72cdcdU, 0x4b175c5cU, 0x52b8eaeaU,
0x8f810e0eU, 0x3d586565U, 0xcc3cf0f0U, 0x7d196464U,
0x7ee59b9bU, 0x91871616U, 0x734e3d3dU, 0x08aaa2a2U,
0xc869a1a1U, 0xc76aadadU, 0x85830606U, 0x7ab0cacaU,
0xb570c5c5U, 0xf4659191U, 0xb2d96b6bU, 0xa7892e2eU,
0x18fbe3e3U, 0x47e8afafU, 0x330f3c3cU, 0x674a2d2dU,
0xb071c1c1U, 0x0e575959U, 0xe99f7676U, 0xe135d4d4U,
0x661e7878U, 0xb4249090U, 0x360e3838U, 0x265f7979U,
0xef628d8dU, 0x38596161U, 0x95d24747U, 0x2aa08a8aU,
0xb1259494U, 0xaa228888U, 0x8c7df1f1U, 0xd73bececU,
0x05010404U, 0xa5218484U, 0x9879e1e1U, 0x9b851e1eU,
0x84d75353U, 0x00000000U, 0x5e471919U, 0x0b565d5dU,
0xe39d7e7eU, 0x9fd04f4fU, 0xbb279c9cU, 0x1a534949U,
0x7c4d3131U, 0xee36d8d8U, 0x0a020808U, 0x7be49f9fU,
0x20a28282U, 0xd4c71313U, 0xe8cb2323U, 0xe69c7a7aU,
0x42e9ababU, 0x43bdfefeU, 0xa2882a2aU, 0x9ad14b4bU,
0x40410101U, 0xdbc41f1fU, 0xd838e0e0U, 0x61b7d6d6U,
0x2fa18e8eU, 0x2bf4dfdfU, 0x3af1cbcbU, 0xf6cd3b3bU,
0x1dfae7e7U, 0xe5608585U, 0x41155454U, 0x25a38686U,
0x60e38383U, 0x16acbabaU, 0x295c7575U, 0x34a69292U,
0xf7996e6eU, 0xe434d0d0U, 0x721a6868U, 0x01545555U,
0x19afb6b6U, 0xdf914e4eU, 0xfa32c8c8U, 0xf030c0c0U,
0x21f6d7d7U, 0xbc8e3232U, 0x75b3c6c6U, 0x6fe08f8fU,
0x691d7474U, 0x2ef5dbdbU, 0x6ae18b8bU, 0x962eb8b8U,
0x8a800a0aU, 0xfe679999U, 0xe2c92b2bU, 0xe0618181U,
0xc0c30303U, 0x8d29a4a4U, 0xaf238c8cU, 0x07a9aeaeU,
0x390d3434U, 0x1f524d4dU, 0x764f3939U, 0xd36ebdbdU,
0x81d65757U, 0xb7d86f6fU, 0xeb37dcdcU, 0x51441515U,
0xa6dd7b7bU, 0x09fef7f7U, 0xb68c3a3aU, 0x932fbcbcU,
0x0f030c0cU, 0x03fcffffU, 0xc26ba9a9U, 0xba73c9c9U,
0xd96cb5b5U, 0xdc6db1b1U, 0x375a6d6dU, 0x15504545U,
0xb98f3636U, 0x771b6c6cU, 0x13adbebeU, 0xda904a4aU,
0x57b9eeeeU, 0xa9de7777U, 0x4cbef2f2U, 0x837efdfdU,
0x55114444U, 0xbdda6767U, 0x2c5d7171U, 0x45400505U,
0x631f7c7cU, 0x50104040U, 0x325b6969U, 0xb8db6363U,
0x220a2828U, 0xc5c20707U, 0xf531c4c4U, 0xa88a2222U,
0x31a79696U, 0xf9ce3737U, 0x977aededU, 0x49bff6f6U,
0x992db4b4U, 0xa475d1d1U, 0x90d34343U, 0x5a124848U,
0x58bae2e2U, 0x71e69797U, 0x64b6d2d2U, 0x70b2c2c2U,
0xad8b2626U, 0xcd68a5a5U, 0xcb955e5eU, 0x624b2929U,
0x3c0c3030U, 0xce945a5aU, 0xab76ddddU, 0x867ff9f9U,
0xf1649595U, 0x5dbbe6e6U, 0x35f2c7c7U, 0x2d092424U,
0xd1c61717U, 0xd66fb9b9U, 0xdec51b1bU, 0x94861212U,
0x78186060U, 0x30f3c3c3U, 0x897cf5f5U, 0x5cefb3b3U,
0xd23ae8e8U, 0xacdf7373U, 0x794c3535U, 0xa0208080U,
0x9d78e5e5U, 0x56edbbbbU, 0x235e7d7dU, 0xc63ef8f8U,
0x8bd45f5fU, 0xe7c82f2fU, 0xdd39e4e4U, 0x68492121U,
};

void sm4_encrypt(const SM4_KEY *key, const unsigned char in[16], unsigned char out[16])
{
uint32_t X0, X1, X2, X3, X4;
int i;

X0 = GETU32(in );
X1 = GETU32(in + 4);
X2 = GETU32(in + 8);
X3 = GETU32(in + 12);

for (i = 0; i < 32; i++) {

X4 = X1 ^ X2 ^ X3 ^ key->rk[i];

X4 = X0 ^
ROL32(T[(X4 ) & 0xff], 8) ^
ROL32(T[(X4 >> 8) & 0xff], 16) ^
ROL32(T[(X4 >> 16) & 0xff], 24) ^
T[(X4 >> 24) & 0xff];

X0 = X1;
X1 = X2;
X2 = X3;
X3 = X4;
}

PUTU32(out , X3);
PUTU32(out + 4, X2);
PUTU32(out + 8, X1);
PUTU32(out + 12, X0);
}
#else
void sm4_encrypt(const SM4_KEY *key, const unsigned char in[16], unsigned char out[16])
{
uint32_t X0, X1, X2, X3, X4;
Expand Down Expand Up @@ -171,3 +271,4 @@ void sm4_encrypt(const SM4_KEY *key, const unsigned char in[16], unsigned char o
PUTU32(out + 8, X1);
PUTU32(out + 12, X0);
}
#endif // ENABLE_SM4_TBOX
Loading

0 comments on commit b83ff15

Please sign in to comment.