-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge sm4_tbox.c into sm4.c, optimize GMUL for AAarch64
- Loading branch information
Showing
5 changed files
with
210 additions
and
258 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Copyright 2014-2024 The GmSSL Project. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the License); you may | ||
* not use this file except in compliance with the License. | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
*/ | ||
|
||
|
||
v5.16b, v1.16b, v1.16b, #8/* GF(2^128) defined by f(x) = x^128 + x^7 + x^2 + x + 1 | ||
|
||
f0 = x^128 = x^7 + x^2 + x + 1 | ||
ext([a0,a1],[b0,b1],8) => [a1,b0] | ||
|
||
a * b | ||
= (a0 + a1 * x^64) + (b0 + b1 * x^64) | ||
= a0 * b0 + (a0 * b1 + a1 * b0) * x^64 + a1 * b1 * x^128 | ||
= a0 * b0 + ((a0 + a1)*(b0 + b1) - a0*b0 - a1*b1) * x^64 + a1 * b1 * x^128 | ||
= c + e * x^64 + d' * x^128 | ||
= c + e0 * x^64 + e1 * x^128 + d' * x^128 | ||
= c + e0 * x^64 + (d' + e1) * f0 | ||
= c + e0 * x^64 + d * f0 | ||
= c + e0 * x^64 + (d0 + d1 * x^64) * f0 | ||
= c + e0 * x^64 + d0 * f0 + (d1 * f0) * x^64 -- w = d1 * f0 | ||
= c + e0 * x^64 + d0 * f0 + (w0 + w1 * x^64) * x^64 | ||
= c + e0 * x^64 + d0 * f0 + w0 * x^64 + w1 * x^128 | ||
= c + e0 * x^64 + w0 * x^64 + d0 * f0 + w1 * f0 | ||
= c + (e0 + w0) * x^64 + (d0 + w1) * f0 | ||
*/ | ||
.text | ||
.globl _gmul | ||
.align 4 | ||
_gmul: | ||
// load (a0, a1) | ||
ld1 {v1.2d},[x1] | ||
// load (b0, b1) | ||
ld1 {v2.2d},[x2] | ||
|
||
// prepare zero | ||
eor v0.16b, v0.16b, v0.16b | ||
|
||
// set f(x) = x^7 + x^2 + x + 1 (0x87) | ||
movi v7.16b, #0x87 | ||
ushr v7.2d, v7.2d, #56 | ||
|
||
// Multiply: 3*mul + 2*ext + 4*eor | ||
|
||
// c = a0 * b0 | ||
pmull v3.1q, v1.1d, v2.1d | ||
|
||
// a0 + a1 | ||
ext v5.16b, v1.16b, v1.16b, #8 | ||
eor v5.16b, v5.16b, v1.16b | ||
|
||
// d' = a1 * b1 | ||
pmull2 v4.1q, v1.2d, v2.2d | ||
|
||
// b0 + b1 | ||
ext v6.16b, v2.16b, v2.16b, #8 | ||
eor v6.16b, v6.16b, v2.16b | ||
|
||
// e = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1 | ||
pmull v5.1q, v5.1d, v6.1d | ||
eor v5.16b, v5.16b, v3.16b | ||
eor v5.16b, v5.16b, v4.16b | ||
|
||
// Reduce: 2*mul + 3*ext + 5*eor | ||
|
||
// d = d' + e1 | ||
ext v6.16b, v5.16b, v0.16b, #8 | ||
eor v4.16b, v4.16b, v6.16b | ||
|
||
// w = d1 * f0 | ||
pmull2 v6.1q, v4.2d, v7.2d | ||
|
||
// (e0 + w0) * x^64 | ||
eor v5.16b, v5.16b, v6.16b | ||
ext v5.16b, v0.16b, v5.16b, #8 | ||
|
||
// c = c + (e0 + w0) * x^64 | ||
eor v3.16b, v3.16b, v5.16b | ||
|
||
// (d0 + w1) * f0 | ||
ext v6.16b, v6.16b, v6.16b, #8 | ||
eor v4.16b, v4.16b, v6.16b | ||
pmull v4.1q, v4.1d, v7.1d | ||
|
||
// c += (d0 + w1) * f0 | ||
eor v3.16b, v3.16b, v4.16b | ||
|
||
// Output | ||
st1 {v3.2d}, [x0] | ||
|
||
ret | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.