From fef98f11b9bcf87aaf5957865e049d8d2abc1d54 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Mon, 10 Mar 2025 21:06:22 -0700 Subject: [PATCH 1/2] Actually test more asm functions --- fiat-amd64/gentest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fiat-amd64/gentest.py b/fiat-amd64/gentest.py index ee50d4ce1b..45ad901d22 100755 --- a/fiat-amd64/gentest.py +++ b/fiat-amd64/gentest.py @@ -46,7 +46,7 @@ def removeprefix(s, prefix): asm_op_names = OrderedDict() -regex = re.compile(r'fiat_(?P[^_]+(_(solinas|montgomery|dettman))?)_(?P(carry_)?(square|mul))') +regex = re.compile(r'fiat_(?P[^_]+(_(solinas|montgomery|dettman))?)_(?P(carry_)?(square|mul|from_bytes|to_bytes|add|sub|opp))') for dirname in directories: m = regex.match(os.path.basename(dirname)) if m: @@ -74,6 +74,9 @@ def asm_op_names_key(val): kind = 3 n, prime = solinasprimes[name] + else: + assert False, name + return (kind, n, prime, op, name, fnames) def is_small(val): From 6eb838f7bf2cd796147aaae11997c1d1238757f5 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Tue, 11 Mar 2025 12:10:46 -0700 Subject: [PATCH 2/2] Add from_bytes asm tests Reverts #2045 Redux of #2043 This time actually testing them --- .../clang_19_1_0_O0.asm | 274 +++++++ .../fiat_curve25519_from_bytes/gcc_14_1_0.asm | 366 +++++++++ .../fiat_p224_from_bytes/clang_19_1_0_O0.asm | 206 +++++ .../fiat_p224_from_bytes/gcc_14_1_0.asm | 280 +++++++ .../fiat_p256_from_bytes/clang_19_1_0_O0.asm | 234 ++++++ .../fiat_p256_from_bytes/gcc_14_1_0.asm | 320 ++++++++ .../clang_19_1_0_O0.asm | 402 ++++++++++ .../gcc_14_1_0.asm | 552 +++++++++++++ .../fiat_p521_from_bytes/clang_19_1_0_O0.asm | 532 +++++++++++++ .../fiat_p521_from_bytes/gcc_14_1_0.asm | 722 ++++++++++++++++++ .../clang_19_1_0_O0.asm | 149 ++++ .../fiat_poly1305_from_bytes/gcc_14_1_0.asm | 196 +++++ 12 files changed, 4233 insertions(+) create mode 100644 fiat-amd64/fiat_curve25519_from_bytes/clang_19_1_0_O0.asm create mode 100644 fiat-amd64/fiat_curve25519_from_bytes/gcc_14_1_0.asm create mode 100644 fiat-amd64/fiat_p224_from_bytes/clang_19_1_0_O0.asm create mode 100644 fiat-amd64/fiat_p224_from_bytes/gcc_14_1_0.asm create mode 100644 fiat-amd64/fiat_p256_from_bytes/clang_19_1_0_O0.asm create mode 100644 fiat-amd64/fiat_p256_from_bytes/gcc_14_1_0.asm create mode 100644 fiat-amd64/fiat_p448_solinas_from_bytes/clang_19_1_0_O0.asm create mode 100644 fiat-amd64/fiat_p448_solinas_from_bytes/gcc_14_1_0.asm create mode 100644 fiat-amd64/fiat_p521_from_bytes/clang_19_1_0_O0.asm create mode 100644 fiat-amd64/fiat_p521_from_bytes/gcc_14_1_0.asm create mode 100644 fiat-amd64/fiat_poly1305_from_bytes/clang_19_1_0_O0.asm create mode 100644 fiat-amd64/fiat_poly1305_from_bytes/gcc_14_1_0.asm diff --git a/fiat-amd64/fiat_curve25519_from_bytes/clang_19_1_0_O0.asm b/fiat-amd64/fiat_curve25519_from_bytes/clang_19_1_0_O0.asm new file mode 100644 index 0000000000..08ec01f510 --- /dev/null +++ b/fiat-amd64/fiat_curve25519_from_bytes/clang_19_1_0_O0.asm @@ -0,0 +1,274 @@ + .globl _Z21fiat_25519_from_bytesPmPKh +_Z21fiat_25519_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 456 + mov qword ptr [rbp - 8], rdi + mov qword ptr [rbp - 16], rsi + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 31] + shl rax, 44 + mov qword ptr [rbp - 24], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 30] + shl rax, 36 + mov qword ptr [rbp - 32], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 29] + shl rax, 28 + mov qword ptr [rbp - 40], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 28] + shl rax, 20 + mov qword ptr [rbp - 48], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 27] + shl rax, 12 + mov qword ptr [rbp - 56], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 26] + shl rax, 4 + mov qword ptr [rbp - 64], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 25] + shl rax, 47 + mov qword ptr [rbp - 72], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 24] + shl rax, 39 + mov qword ptr [rbp - 80], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 23] + shl rax, 31 + mov qword ptr [rbp - 88], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 22] + shl rax, 23 + mov qword ptr [rbp - 96], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 21] + shl rax, 15 + mov qword ptr [rbp - 104], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 20] + shl rax, 7 + mov qword ptr [rbp - 112], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 19] + shl rax, 50 + mov qword ptr [rbp - 120], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 18] + shl rax, 42 + mov qword ptr [rbp - 128], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 17] + shl rax, 34 + mov qword ptr [rbp - 136], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 16] + shl rax, 26 + mov qword ptr [rbp - 144], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 15] + shl rax, 18 + mov qword ptr [rbp - 152], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 14] + shl rax, 10 + mov qword ptr [rbp - 160], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 13] + shl rax, 2 + mov qword ptr [rbp - 168], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 12] + shl rax, 45 + mov qword ptr [rbp - 176], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 11] + shl rax, 37 + mov qword ptr [rbp - 184], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 10] + shl rax, 29 + mov qword ptr [rbp - 192], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 9] + shl rax, 21 + mov qword ptr [rbp - 200], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 8] + shl rax, 13 + mov qword ptr [rbp - 208], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 7] + shl rax, 5 + mov qword ptr [rbp - 216], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 6] + shl rax, 48 + mov qword ptr [rbp - 224], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 5] + shl rax, 40 + mov qword ptr [rbp - 232], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 4] + shl rax, 32 + mov qword ptr [rbp - 240], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 3] + shl rax, 24 + mov qword ptr [rbp - 248], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 2] + shl rax, 16 + mov qword ptr [rbp - 256], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 1] + shl rax, 8 + mov qword ptr [rbp - 264], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax] + mov byte ptr [rbp - 265], al + mov rax, qword ptr [rbp - 264] + movzx ecx, byte ptr [rbp - 265] + add rax, rcx + mov qword ptr [rbp - 280], rax + mov rax, qword ptr [rbp - 256] + add rax, qword ptr [rbp - 280] + mov qword ptr [rbp - 288], rax + mov rax, qword ptr [rbp - 248] + add rax, qword ptr [rbp - 288] + mov qword ptr [rbp - 296], rax + mov rax, qword ptr [rbp - 240] + add rax, qword ptr [rbp - 296] + mov qword ptr [rbp - 304], rax + mov rax, qword ptr [rbp - 232] + add rax, qword ptr [rbp - 304] + mov qword ptr [rbp - 312], rax + mov rax, qword ptr [rbp - 224] + add rax, qword ptr [rbp - 312] + mov qword ptr [rbp - 320], rax + movabs rax, 2251799813685247 + and rax, qword ptr [rbp - 320] + mov qword ptr [rbp - 328], rax + mov rax, qword ptr [rbp - 320] + shr rax, 51 + mov byte ptr [rbp - 329], al + mov rax, qword ptr [rbp - 216] + movzx ecx, byte ptr [rbp - 329] + add rax, rcx + mov qword ptr [rbp - 344], rax + mov rax, qword ptr [rbp - 208] + add rax, qword ptr [rbp - 344] + mov qword ptr [rbp - 352], rax + mov rax, qword ptr [rbp - 200] + add rax, qword ptr [rbp - 352] + mov qword ptr [rbp - 360], rax + mov rax, qword ptr [rbp - 192] + add rax, qword ptr [rbp - 360] + mov qword ptr [rbp - 368], rax + mov rax, qword ptr [rbp - 184] + add rax, qword ptr [rbp - 368] + mov qword ptr [rbp - 376], rax + mov rax, qword ptr [rbp - 176] + add rax, qword ptr [rbp - 376] + mov qword ptr [rbp - 384], rax + movabs rax, 2251799813685247 + and rax, qword ptr [rbp - 384] + mov qword ptr [rbp - 392], rax + mov rax, qword ptr [rbp - 384] + shr rax, 51 + mov byte ptr [rbp - 393], al + mov rax, qword ptr [rbp - 168] + movzx ecx, byte ptr [rbp - 393] + add rax, rcx + mov qword ptr [rbp - 408], rax + mov rax, qword ptr [rbp - 160] + add rax, qword ptr [rbp - 408] + mov qword ptr [rbp - 416], rax + mov rax, qword ptr [rbp - 152] + add rax, qword ptr [rbp - 416] + mov qword ptr [rbp - 424], rax + mov rax, qword ptr [rbp - 144] + add rax, qword ptr [rbp - 424] + mov qword ptr [rbp - 432], rax + mov rax, qword ptr [rbp - 136] + add rax, qword ptr [rbp - 432] + mov qword ptr [rbp - 440], rax + mov rax, qword ptr [rbp - 128] + add rax, qword ptr [rbp - 440] + mov qword ptr [rbp - 448], rax + mov rax, qword ptr [rbp - 120] + add rax, qword ptr [rbp - 448] + mov qword ptr [rbp - 456], rax + movabs rax, 2251799813685247 + and rax, qword ptr [rbp - 456] + mov qword ptr [rbp - 464], rax + mov rax, qword ptr [rbp - 456] + shr rax, 51 + mov byte ptr [rbp - 465], al + mov rax, qword ptr [rbp - 112] + movzx ecx, byte ptr [rbp - 465] + add rax, rcx + mov qword ptr [rbp - 480], rax + mov rax, qword ptr [rbp - 104] + add rax, qword ptr [rbp - 480] + mov qword ptr [rbp - 488], rax + mov rax, qword ptr [rbp - 96] + add rax, qword ptr [rbp - 488] + mov qword ptr [rbp - 496], rax + mov rax, qword ptr [rbp - 88] + add rax, qword ptr [rbp - 496] + mov qword ptr [rbp - 504], rax + mov rax, qword ptr [rbp - 80] + add rax, qword ptr [rbp - 504] + mov qword ptr [rbp - 512], rax + mov rax, qword ptr [rbp - 72] + add rax, qword ptr [rbp - 512] + mov qword ptr [rbp - 520], rax + movabs rax, 2251799813685247 + and rax, qword ptr [rbp - 520] + mov qword ptr [rbp - 528], rax + mov rax, qword ptr [rbp - 520] + shr rax, 51 + mov byte ptr [rbp - 529], al + mov rax, qword ptr [rbp - 64] + movzx ecx, byte ptr [rbp - 529] + add rax, rcx + mov qword ptr [rbp - 544], rax + mov rax, qword ptr [rbp - 56] + add rax, qword ptr [rbp - 544] + mov qword ptr [rbp - 552], rax + mov rax, qword ptr [rbp - 48] + add rax, qword ptr [rbp - 552] + mov qword ptr [rbp - 560], rax + mov rax, qword ptr [rbp - 40] + add rax, qword ptr [rbp - 560] + mov qword ptr [rbp - 568], rax + mov rax, qword ptr [rbp - 32] + add rax, qword ptr [rbp - 568] + mov qword ptr [rbp - 576], rax + mov rax, qword ptr [rbp - 24] + add rax, qword ptr [rbp - 576] + mov qword ptr [rbp - 584], rax + mov rcx, qword ptr [rbp - 328] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax], rcx + mov rcx, qword ptr [rbp - 392] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 8], rcx + mov rcx, qword ptr [rbp - 464] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 16], rcx + mov rcx, qword ptr [rbp - 528] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 24], rcx + mov rcx, qword ptr [rbp - 584] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 32], rcx + add rsp, 456 + pop rbp + ret \ No newline at end of file diff --git a/fiat-amd64/fiat_curve25519_from_bytes/gcc_14_1_0.asm b/fiat-amd64/fiat_curve25519_from_bytes/gcc_14_1_0.asm new file mode 100644 index 0000000000..bad5b90848 --- /dev/null +++ b/fiat-amd64/fiat_curve25519_from_bytes/gcc_14_1_0.asm @@ -0,0 +1,366 @@ + .globl _Z21fiat_25519_from_bytesPmPKh +_Z21fiat_25519_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 472 + mov QWORD PTR [rbp-584], rdi + mov QWORD PTR [rbp-592], rsi + mov rax, QWORD PTR [rbp-592] + add rax, 31 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 44 + mov QWORD PTR [rbp-8], rax + mov rax, QWORD PTR [rbp-592] + add rax, 30 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 36 + mov QWORD PTR [rbp-16], rax + mov rax, QWORD PTR [rbp-592] + add rax, 29 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 28 + mov QWORD PTR [rbp-24], rax + mov rax, QWORD PTR [rbp-592] + add rax, 28 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 20 + mov QWORD PTR [rbp-32], rax + mov rax, QWORD PTR [rbp-592] + add rax, 27 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 12 + mov QWORD PTR [rbp-40], rax + mov rax, QWORD PTR [rbp-592] + add rax, 26 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 4 + mov QWORD PTR [rbp-48], rax + mov rax, QWORD PTR [rbp-592] + add rax, 25 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 47 + mov QWORD PTR [rbp-56], rax + mov rax, QWORD PTR [rbp-592] + add rax, 24 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 39 + mov QWORD PTR [rbp-64], rax + mov rax, QWORD PTR [rbp-592] + add rax, 23 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 31 + mov QWORD PTR [rbp-72], rax + mov rax, QWORD PTR [rbp-592] + add rax, 22 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 23 + mov QWORD PTR [rbp-80], rax + mov rax, QWORD PTR [rbp-592] + add rax, 21 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 15 + mov QWORD PTR [rbp-88], rax + mov rax, QWORD PTR [rbp-592] + add rax, 20 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 7 + mov QWORD PTR [rbp-96], rax + mov rax, QWORD PTR [rbp-592] + add rax, 19 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 50 + mov QWORD PTR [rbp-104], rax + mov rax, QWORD PTR [rbp-592] + add rax, 18 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 42 + mov QWORD PTR [rbp-112], rax + mov rax, QWORD PTR [rbp-592] + add rax, 17 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 34 + mov QWORD PTR [rbp-120], rax + mov rax, QWORD PTR [rbp-592] + add rax, 16 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 26 + mov QWORD PTR [rbp-128], rax + mov rax, QWORD PTR [rbp-592] + add rax, 15 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 18 + mov QWORD PTR [rbp-136], rax + mov rax, QWORD PTR [rbp-592] + add rax, 14 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 10 + mov QWORD PTR [rbp-144], rax + mov rax, QWORD PTR [rbp-592] + add rax, 13 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 2 + mov QWORD PTR [rbp-152], rax + mov rax, QWORD PTR [rbp-592] + add rax, 12 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 45 + mov QWORD PTR [rbp-160], rax + mov rax, QWORD PTR [rbp-592] + add rax, 11 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 37 + mov QWORD PTR [rbp-168], rax + mov rax, QWORD PTR [rbp-592] + add rax, 10 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 29 + mov QWORD PTR [rbp-176], rax + mov rax, QWORD PTR [rbp-592] + add rax, 9 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 21 + mov QWORD PTR [rbp-184], rax + mov rax, QWORD PTR [rbp-592] + add rax, 8 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 13 + mov QWORD PTR [rbp-192], rax + mov rax, QWORD PTR [rbp-592] + add rax, 7 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 5 + mov QWORD PTR [rbp-200], rax + mov rax, QWORD PTR [rbp-592] + add rax, 6 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-208], rax + mov rax, QWORD PTR [rbp-592] + add rax, 5 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-216], rax + mov rax, QWORD PTR [rbp-592] + add rax, 4 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-224], rax + mov rax, QWORD PTR [rbp-592] + add rax, 3 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-232], rax + mov rax, QWORD PTR [rbp-592] + add rax, 2 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-240], rax + mov rax, QWORD PTR [rbp-592] + add rax, 1 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-248], rax + mov rax, QWORD PTR [rbp-592] + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-249], al + movzx edx, BYTE PTR [rbp-249] + mov rax, QWORD PTR [rbp-248] + add rax, rdx + mov QWORD PTR [rbp-264], rax + mov rdx, QWORD PTR [rbp-240] + mov rax, QWORD PTR [rbp-264] + add rax, rdx + mov QWORD PTR [rbp-272], rax + mov rdx, QWORD PTR [rbp-232] + mov rax, QWORD PTR [rbp-272] + add rax, rdx + mov QWORD PTR [rbp-280], rax + mov rdx, QWORD PTR [rbp-224] + mov rax, QWORD PTR [rbp-280] + add rax, rdx + mov QWORD PTR [rbp-288], rax + mov rdx, QWORD PTR [rbp-216] + mov rax, QWORD PTR [rbp-288] + add rax, rdx + mov QWORD PTR [rbp-296], rax + mov rdx, QWORD PTR [rbp-208] + mov rax, QWORD PTR [rbp-296] + add rax, rdx + mov QWORD PTR [rbp-304], rax + movabs rax, 2251799813685247 + and rax, QWORD PTR [rbp-304] + mov QWORD PTR [rbp-312], rax + mov rax, QWORD PTR [rbp-304] + shr rax, 51 + mov BYTE PTR [rbp-313], al + movzx edx, BYTE PTR [rbp-313] + mov rax, QWORD PTR [rbp-200] + add rax, rdx + mov QWORD PTR [rbp-328], rax + mov rdx, QWORD PTR [rbp-192] + mov rax, QWORD PTR [rbp-328] + add rax, rdx + mov QWORD PTR [rbp-336], rax + mov rdx, QWORD PTR [rbp-184] + mov rax, QWORD PTR [rbp-336] + add rax, rdx + mov QWORD PTR [rbp-344], rax + mov rdx, QWORD PTR [rbp-176] + mov rax, QWORD PTR [rbp-344] + add rax, rdx + mov QWORD PTR [rbp-352], rax + mov rdx, QWORD PTR [rbp-168] + mov rax, QWORD PTR [rbp-352] + add rax, rdx + mov QWORD PTR [rbp-360], rax + mov rdx, QWORD PTR [rbp-160] + mov rax, QWORD PTR [rbp-360] + add rax, rdx + mov QWORD PTR [rbp-368], rax + movabs rax, 2251799813685247 + and rax, QWORD PTR [rbp-368] + mov QWORD PTR [rbp-376], rax + mov rax, QWORD PTR [rbp-368] + shr rax, 51 + mov BYTE PTR [rbp-377], al + movzx edx, BYTE PTR [rbp-377] + mov rax, QWORD PTR [rbp-152] + add rax, rdx + mov QWORD PTR [rbp-392], rax + mov rdx, QWORD PTR [rbp-144] + mov rax, QWORD PTR [rbp-392] + add rax, rdx + mov QWORD PTR [rbp-400], rax + mov rdx, QWORD PTR [rbp-136] + mov rax, QWORD PTR [rbp-400] + add rax, rdx + mov QWORD PTR [rbp-408], rax + mov rdx, QWORD PTR [rbp-128] + mov rax, QWORD PTR [rbp-408] + add rax, rdx + mov QWORD PTR [rbp-416], rax + mov rdx, QWORD PTR [rbp-120] + mov rax, QWORD PTR [rbp-416] + add rax, rdx + mov QWORD PTR [rbp-424], rax + mov rdx, QWORD PTR [rbp-112] + mov rax, QWORD PTR [rbp-424] + add rax, rdx + mov QWORD PTR [rbp-432], rax + mov rdx, QWORD PTR [rbp-104] + mov rax, QWORD PTR [rbp-432] + add rax, rdx + mov QWORD PTR [rbp-440], rax + movabs rax, 2251799813685247 + and rax, QWORD PTR [rbp-440] + mov QWORD PTR [rbp-448], rax + mov rax, QWORD PTR [rbp-440] + shr rax, 51 + mov BYTE PTR [rbp-449], al + movzx edx, BYTE PTR [rbp-449] + mov rax, QWORD PTR [rbp-96] + add rax, rdx + mov QWORD PTR [rbp-464], rax + mov rdx, QWORD PTR [rbp-88] + mov rax, QWORD PTR [rbp-464] + add rax, rdx + mov QWORD PTR [rbp-472], rax + mov rdx, QWORD PTR [rbp-80] + mov rax, QWORD PTR [rbp-472] + add rax, rdx + mov QWORD PTR [rbp-480], rax + mov rdx, QWORD PTR [rbp-72] + mov rax, QWORD PTR [rbp-480] + add rax, rdx + mov QWORD PTR [rbp-488], rax + mov rdx, QWORD PTR [rbp-64] + mov rax, QWORD PTR [rbp-488] + add rax, rdx + mov QWORD PTR [rbp-496], rax + mov rdx, QWORD PTR [rbp-56] + mov rax, QWORD PTR [rbp-496] + add rax, rdx + mov QWORD PTR [rbp-504], rax + movabs rax, 2251799813685247 + and rax, QWORD PTR [rbp-504] + mov QWORD PTR [rbp-512], rax + mov rax, QWORD PTR [rbp-504] + shr rax, 51 + mov BYTE PTR [rbp-513], al + movzx edx, BYTE PTR [rbp-513] + mov rax, QWORD PTR [rbp-48] + add rax, rdx + mov QWORD PTR [rbp-528], rax + mov rdx, QWORD PTR [rbp-40] + mov rax, QWORD PTR [rbp-528] + add rax, rdx + mov QWORD PTR [rbp-536], rax + mov rdx, QWORD PTR [rbp-32] + mov rax, QWORD PTR [rbp-536] + add rax, rdx + mov QWORD PTR [rbp-544], rax + mov rdx, QWORD PTR [rbp-24] + mov rax, QWORD PTR [rbp-544] + add rax, rdx + mov QWORD PTR [rbp-552], rax + mov rdx, QWORD PTR [rbp-16] + mov rax, QWORD PTR [rbp-552] + add rax, rdx + mov QWORD PTR [rbp-560], rax + mov rdx, QWORD PTR [rbp-8] + mov rax, QWORD PTR [rbp-560] + add rax, rdx + mov QWORD PTR [rbp-568], rax + mov rax, QWORD PTR [rbp-584] + mov rdx, QWORD PTR [rbp-312] + mov QWORD PTR [rax], rdx + mov rax, QWORD PTR [rbp-584] + lea rdx, [rax+8] + mov rax, QWORD PTR [rbp-376] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-584] + lea rdx, [rax+16] + mov rax, QWORD PTR [rbp-448] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-584] + lea rdx, [rax+24] + mov rax, QWORD PTR [rbp-512] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-584] + lea rdx, [rax+32] + mov rax, QWORD PTR [rbp-568] + mov QWORD PTR [rdx], rax + nop + leave + ret diff --git a/fiat-amd64/fiat_p224_from_bytes/clang_19_1_0_O0.asm b/fiat-amd64/fiat_p224_from_bytes/clang_19_1_0_O0.asm new file mode 100644 index 0000000000..c1e17c203a --- /dev/null +++ b/fiat-amd64/fiat_p224_from_bytes/clang_19_1_0_O0.asm @@ -0,0 +1,206 @@ + .globl _Z20fiat_p224_from_bytesPmPKh +_Z20fiat_p224_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 304 + mov qword ptr [rbp - 8], rdi + mov qword ptr [rbp - 16], rsi + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 27] + shl rax, 24 + mov qword ptr [rbp - 24], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 26] + shl rax, 16 + mov qword ptr [rbp - 32], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 25] + shl rax, 8 + mov qword ptr [rbp - 40], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 24] + mov byte ptr [rbp - 41], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 23] + shl rax, 56 + mov qword ptr [rbp - 56], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 22] + shl rax, 48 + mov qword ptr [rbp - 64], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 21] + shl rax, 40 + mov qword ptr [rbp - 72], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 20] + shl rax, 32 + mov qword ptr [rbp - 80], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 19] + shl rax, 24 + mov qword ptr [rbp - 88], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 18] + shl rax, 16 + mov qword ptr [rbp - 96], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 17] + shl rax, 8 + mov qword ptr [rbp - 104], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 16] + mov byte ptr [rbp - 105], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 15] + shl rax, 56 + mov qword ptr [rbp - 120], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 14] + shl rax, 48 + mov qword ptr [rbp - 128], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 13] + shl rax, 40 + mov qword ptr [rbp - 136], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 12] + shl rax, 32 + mov qword ptr [rbp - 144], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 11] + shl rax, 24 + mov qword ptr [rbp - 152], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 10] + shl rax, 16 + mov qword ptr [rbp - 160], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 9] + shl rax, 8 + mov qword ptr [rbp - 168], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 8] + mov byte ptr [rbp - 169], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 7] + shl rax, 56 + mov qword ptr [rbp - 184], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 6] + shl rax, 48 + mov qword ptr [rbp - 192], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 5] + shl rax, 40 + mov qword ptr [rbp - 200], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 4] + shl rax, 32 + mov qword ptr [rbp - 208], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 3] + shl rax, 24 + mov qword ptr [rbp - 216], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 2] + shl rax, 16 + mov qword ptr [rbp - 224], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 1] + shl rax, 8 + mov qword ptr [rbp - 232], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax] + mov byte ptr [rbp - 233], al + mov rax, qword ptr [rbp - 232] + movzx ecx, byte ptr [rbp - 233] + add rax, rcx + mov qword ptr [rbp - 248], rax + mov rax, qword ptr [rbp - 224] + add rax, qword ptr [rbp - 248] + mov qword ptr [rbp - 256], rax + mov rax, qword ptr [rbp - 216] + add rax, qword ptr [rbp - 256] + mov qword ptr [rbp - 264], rax + mov rax, qword ptr [rbp - 208] + add rax, qword ptr [rbp - 264] + mov qword ptr [rbp - 272], rax + mov rax, qword ptr [rbp - 200] + add rax, qword ptr [rbp - 272] + mov qword ptr [rbp - 280], rax + mov rax, qword ptr [rbp - 192] + add rax, qword ptr [rbp - 280] + mov qword ptr [rbp - 288], rax + mov rax, qword ptr [rbp - 184] + add rax, qword ptr [rbp - 288] + mov qword ptr [rbp - 296], rax + mov rax, qword ptr [rbp - 168] + movzx ecx, byte ptr [rbp - 169] + add rax, rcx + mov qword ptr [rbp - 304], rax + mov rax, qword ptr [rbp - 160] + add rax, qword ptr [rbp - 304] + mov qword ptr [rbp - 312], rax + mov rax, qword ptr [rbp - 152] + add rax, qword ptr [rbp - 312] + mov qword ptr [rbp - 320], rax + mov rax, qword ptr [rbp - 144] + add rax, qword ptr [rbp - 320] + mov qword ptr [rbp - 328], rax + mov rax, qword ptr [rbp - 136] + add rax, qword ptr [rbp - 328] + mov qword ptr [rbp - 336], rax + mov rax, qword ptr [rbp - 128] + add rax, qword ptr [rbp - 336] + mov qword ptr [rbp - 344], rax + mov rax, qword ptr [rbp - 120] + add rax, qword ptr [rbp - 344] + mov qword ptr [rbp - 352], rax + mov rax, qword ptr [rbp - 104] + movzx ecx, byte ptr [rbp - 105] + add rax, rcx + mov qword ptr [rbp - 360], rax + mov rax, qword ptr [rbp - 96] + add rax, qword ptr [rbp - 360] + mov qword ptr [rbp - 368], rax + mov rax, qword ptr [rbp - 88] + add rax, qword ptr [rbp - 368] + mov qword ptr [rbp - 376], rax + mov rax, qword ptr [rbp - 80] + add rax, qword ptr [rbp - 376] + mov qword ptr [rbp - 384], rax + mov rax, qword ptr [rbp - 72] + add rax, qword ptr [rbp - 384] + mov qword ptr [rbp - 392], rax + mov rax, qword ptr [rbp - 64] + add rax, qword ptr [rbp - 392] + mov qword ptr [rbp - 400], rax + mov rax, qword ptr [rbp - 56] + add rax, qword ptr [rbp - 400] + mov qword ptr [rbp - 408], rax + mov rax, qword ptr [rbp - 40] + movzx ecx, byte ptr [rbp - 41] + add rax, rcx + mov qword ptr [rbp - 416], rax + mov rax, qword ptr [rbp - 32] + add rax, qword ptr [rbp - 416] + mov qword ptr [rbp - 424], rax + mov rax, qword ptr [rbp - 24] + add rax, qword ptr [rbp - 424] + mov qword ptr [rbp - 432], rax + mov rcx, qword ptr [rbp - 296] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax], rcx + mov rcx, qword ptr [rbp - 352] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 8], rcx + mov rcx, qword ptr [rbp - 408] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 16], rcx + mov rcx, qword ptr [rbp - 432] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 24], rcx + add rsp, 304 + pop rbp + ret \ No newline at end of file diff --git a/fiat-amd64/fiat_p224_from_bytes/gcc_14_1_0.asm b/fiat-amd64/fiat_p224_from_bytes/gcc_14_1_0.asm new file mode 100644 index 0000000000..89c5b59715 --- /dev/null +++ b/fiat-amd64/fiat_p224_from_bytes/gcc_14_1_0.asm @@ -0,0 +1,280 @@ + .globl _Z20fiat_p224_from_bytesPmPKh +_Z20fiat_p224_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 312 + mov QWORD PTR [rbp-424], rdi + mov QWORD PTR [rbp-432], rsi + mov rax, QWORD PTR [rbp-432] + add rax, 27 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-8], rax + mov rax, QWORD PTR [rbp-432] + add rax, 26 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-16], rax + mov rax, QWORD PTR [rbp-432] + add rax, 25 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-24], rax + mov rax, QWORD PTR [rbp-432] + add rax, 24 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-25], al + mov rax, QWORD PTR [rbp-432] + add rax, 23 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-40], rax + mov rax, QWORD PTR [rbp-432] + add rax, 22 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-48], rax + mov rax, QWORD PTR [rbp-432] + add rax, 21 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-56], rax + mov rax, QWORD PTR [rbp-432] + add rax, 20 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-64], rax + mov rax, QWORD PTR [rbp-432] + add rax, 19 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-72], rax + mov rax, QWORD PTR [rbp-432] + add rax, 18 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-80], rax + mov rax, QWORD PTR [rbp-432] + add rax, 17 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-88], rax + mov rax, QWORD PTR [rbp-432] + add rax, 16 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-89], al + mov rax, QWORD PTR [rbp-432] + add rax, 15 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-104], rax + mov rax, QWORD PTR [rbp-432] + add rax, 14 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-112], rax + mov rax, QWORD PTR [rbp-432] + add rax, 13 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-120], rax + mov rax, QWORD PTR [rbp-432] + add rax, 12 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-128], rax + mov rax, QWORD PTR [rbp-432] + add rax, 11 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-136], rax + mov rax, QWORD PTR [rbp-432] + add rax, 10 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-144], rax + mov rax, QWORD PTR [rbp-432] + add rax, 9 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-152], rax + mov rax, QWORD PTR [rbp-432] + add rax, 8 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-153], al + mov rax, QWORD PTR [rbp-432] + add rax, 7 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-168], rax + mov rax, QWORD PTR [rbp-432] + add rax, 6 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-176], rax + mov rax, QWORD PTR [rbp-432] + add rax, 5 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-184], rax + mov rax, QWORD PTR [rbp-432] + add rax, 4 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-192], rax + mov rax, QWORD PTR [rbp-432] + add rax, 3 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-200], rax + mov rax, QWORD PTR [rbp-432] + add rax, 2 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-208], rax + mov rax, QWORD PTR [rbp-432] + add rax, 1 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-216], rax + mov rax, QWORD PTR [rbp-432] + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-217], al + movzx edx, BYTE PTR [rbp-217] + mov rax, QWORD PTR [rbp-216] + add rax, rdx + mov QWORD PTR [rbp-232], rax + mov rdx, QWORD PTR [rbp-208] + mov rax, QWORD PTR [rbp-232] + add rax, rdx + mov QWORD PTR [rbp-240], rax + mov rdx, QWORD PTR [rbp-200] + mov rax, QWORD PTR [rbp-240] + add rax, rdx + mov QWORD PTR [rbp-248], rax + mov rdx, QWORD PTR [rbp-192] + mov rax, QWORD PTR [rbp-248] + add rax, rdx + mov QWORD PTR [rbp-256], rax + mov rdx, QWORD PTR [rbp-184] + mov rax, QWORD PTR [rbp-256] + add rax, rdx + mov QWORD PTR [rbp-264], rax + mov rdx, QWORD PTR [rbp-176] + mov rax, QWORD PTR [rbp-264] + add rax, rdx + mov QWORD PTR [rbp-272], rax + mov rdx, QWORD PTR [rbp-168] + mov rax, QWORD PTR [rbp-272] + add rax, rdx + mov QWORD PTR [rbp-280], rax + movzx edx, BYTE PTR [rbp-153] + mov rax, QWORD PTR [rbp-152] + add rax, rdx + mov QWORD PTR [rbp-288], rax + mov rdx, QWORD PTR [rbp-144] + mov rax, QWORD PTR [rbp-288] + add rax, rdx + mov QWORD PTR [rbp-296], rax + mov rdx, QWORD PTR [rbp-136] + mov rax, QWORD PTR [rbp-296] + add rax, rdx + mov QWORD PTR [rbp-304], rax + mov rdx, QWORD PTR [rbp-128] + mov rax, QWORD PTR [rbp-304] + add rax, rdx + mov QWORD PTR [rbp-312], rax + mov rdx, QWORD PTR [rbp-120] + mov rax, QWORD PTR [rbp-312] + add rax, rdx + mov QWORD PTR [rbp-320], rax + mov rdx, QWORD PTR [rbp-112] + mov rax, QWORD PTR [rbp-320] + add rax, rdx + mov QWORD PTR [rbp-328], rax + mov rdx, QWORD PTR [rbp-104] + mov rax, QWORD PTR [rbp-328] + add rax, rdx + mov QWORD PTR [rbp-336], rax + movzx edx, BYTE PTR [rbp-89] + mov rax, QWORD PTR [rbp-88] + add rax, rdx + mov QWORD PTR [rbp-344], rax + mov rdx, QWORD PTR [rbp-80] + mov rax, QWORD PTR [rbp-344] + add rax, rdx + mov QWORD PTR [rbp-352], rax + mov rdx, QWORD PTR [rbp-72] + mov rax, QWORD PTR [rbp-352] + add rax, rdx + mov QWORD PTR [rbp-360], rax + mov rdx, QWORD PTR [rbp-64] + mov rax, QWORD PTR [rbp-360] + add rax, rdx + mov QWORD PTR [rbp-368], rax + mov rdx, QWORD PTR [rbp-56] + mov rax, QWORD PTR [rbp-368] + add rax, rdx + mov QWORD PTR [rbp-376], rax + mov rdx, QWORD PTR [rbp-48] + mov rax, QWORD PTR [rbp-376] + add rax, rdx + mov QWORD PTR [rbp-384], rax + mov rdx, QWORD PTR [rbp-40] + mov rax, QWORD PTR [rbp-384] + add rax, rdx + mov QWORD PTR [rbp-392], rax + movzx edx, BYTE PTR [rbp-25] + mov rax, QWORD PTR [rbp-24] + add rax, rdx + mov QWORD PTR [rbp-400], rax + mov rdx, QWORD PTR [rbp-16] + mov rax, QWORD PTR [rbp-400] + add rax, rdx + mov QWORD PTR [rbp-408], rax + mov rdx, QWORD PTR [rbp-8] + mov rax, QWORD PTR [rbp-408] + add rax, rdx + mov QWORD PTR [rbp-416], rax + mov rax, QWORD PTR [rbp-424] + mov rdx, QWORD PTR [rbp-280] + mov QWORD PTR [rax], rdx + mov rax, QWORD PTR [rbp-424] + lea rdx, [rax+8] + mov rax, QWORD PTR [rbp-336] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-424] + lea rdx, [rax+16] + mov rax, QWORD PTR [rbp-392] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-424] + lea rdx, [rax+24] + mov rax, QWORD PTR [rbp-416] + mov QWORD PTR [rdx], rax + nop + leave + ret \ No newline at end of file diff --git a/fiat-amd64/fiat_p256_from_bytes/clang_19_1_0_O0.asm b/fiat-amd64/fiat_p256_from_bytes/clang_19_1_0_O0.asm new file mode 100644 index 0000000000..76949fb3e3 --- /dev/null +++ b/fiat-amd64/fiat_p256_from_bytes/clang_19_1_0_O0.asm @@ -0,0 +1,234 @@ + .globl _Z20fiat_p256_from_bytesPmPKh +_Z20fiat_p256_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 368 + mov qword ptr [rbp - 8], rdi + mov qword ptr [rbp - 16], rsi + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 31] + shl rax, 56 + mov qword ptr [rbp - 24], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 30] + shl rax, 48 + mov qword ptr [rbp - 32], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 29] + shl rax, 40 + mov qword ptr [rbp - 40], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 28] + shl rax, 32 + mov qword ptr [rbp - 48], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 27] + shl rax, 24 + mov qword ptr [rbp - 56], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 26] + shl rax, 16 + mov qword ptr [rbp - 64], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 25] + shl rax, 8 + mov qword ptr [rbp - 72], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 24] + mov byte ptr [rbp - 73], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 23] + shl rax, 56 + mov qword ptr [rbp - 88], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 22] + shl rax, 48 + mov qword ptr [rbp - 96], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 21] + shl rax, 40 + mov qword ptr [rbp - 104], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 20] + shl rax, 32 + mov qword ptr [rbp - 112], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 19] + shl rax, 24 + mov qword ptr [rbp - 120], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 18] + shl rax, 16 + mov qword ptr [rbp - 128], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 17] + shl rax, 8 + mov qword ptr [rbp - 136], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 16] + mov byte ptr [rbp - 137], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 15] + shl rax, 56 + mov qword ptr [rbp - 152], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 14] + shl rax, 48 + mov qword ptr [rbp - 160], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 13] + shl rax, 40 + mov qword ptr [rbp - 168], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 12] + shl rax, 32 + mov qword ptr [rbp - 176], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 11] + shl rax, 24 + mov qword ptr [rbp - 184], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 10] + shl rax, 16 + mov qword ptr [rbp - 192], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 9] + shl rax, 8 + mov qword ptr [rbp - 200], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 8] + mov byte ptr [rbp - 201], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 7] + shl rax, 56 + mov qword ptr [rbp - 216], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 6] + shl rax, 48 + mov qword ptr [rbp - 224], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 5] + shl rax, 40 + mov qword ptr [rbp - 232], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 4] + shl rax, 32 + mov qword ptr [rbp - 240], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 3] + shl rax, 24 + mov qword ptr [rbp - 248], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 2] + shl rax, 16 + mov qword ptr [rbp - 256], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 1] + shl rax, 8 + mov qword ptr [rbp - 264], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax] + mov byte ptr [rbp - 265], al + mov rax, qword ptr [rbp - 264] + movzx ecx, byte ptr [rbp - 265] + add rax, rcx + mov qword ptr [rbp - 280], rax + mov rax, qword ptr [rbp - 256] + add rax, qword ptr [rbp - 280] + mov qword ptr [rbp - 288], rax + mov rax, qword ptr [rbp - 248] + add rax, qword ptr [rbp - 288] + mov qword ptr [rbp - 296], rax + mov rax, qword ptr [rbp - 240] + add rax, qword ptr [rbp - 296] + mov qword ptr [rbp - 304], rax + mov rax, qword ptr [rbp - 232] + add rax, qword ptr [rbp - 304] + mov qword ptr [rbp - 312], rax + mov rax, qword ptr [rbp - 224] + add rax, qword ptr [rbp - 312] + mov qword ptr [rbp - 320], rax + mov rax, qword ptr [rbp - 216] + add rax, qword ptr [rbp - 320] + mov qword ptr [rbp - 328], rax + mov rax, qword ptr [rbp - 200] + movzx ecx, byte ptr [rbp - 201] + add rax, rcx + mov qword ptr [rbp - 336], rax + mov rax, qword ptr [rbp - 192] + add rax, qword ptr [rbp - 336] + mov qword ptr [rbp - 344], rax + mov rax, qword ptr [rbp - 184] + add rax, qword ptr [rbp - 344] + mov qword ptr [rbp - 352], rax + mov rax, qword ptr [rbp - 176] + add rax, qword ptr [rbp - 352] + mov qword ptr [rbp - 360], rax + mov rax, qword ptr [rbp - 168] + add rax, qword ptr [rbp - 360] + mov qword ptr [rbp - 368], rax + mov rax, qword ptr [rbp - 160] + add rax, qword ptr [rbp - 368] + mov qword ptr [rbp - 376], rax + mov rax, qword ptr [rbp - 152] + add rax, qword ptr [rbp - 376] + mov qword ptr [rbp - 384], rax + mov rax, qword ptr [rbp - 136] + movzx ecx, byte ptr [rbp - 137] + add rax, rcx + mov qword ptr [rbp - 392], rax + mov rax, qword ptr [rbp - 128] + add rax, qword ptr [rbp - 392] + mov qword ptr [rbp - 400], rax + mov rax, qword ptr [rbp - 120] + add rax, qword ptr [rbp - 400] + mov qword ptr [rbp - 408], rax + mov rax, qword ptr [rbp - 112] + add rax, qword ptr [rbp - 408] + mov qword ptr [rbp - 416], rax + mov rax, qword ptr [rbp - 104] + add rax, qword ptr [rbp - 416] + mov qword ptr [rbp - 424], rax + mov rax, qword ptr [rbp - 96] + add rax, qword ptr [rbp - 424] + mov qword ptr [rbp - 432], rax + mov rax, qword ptr [rbp - 88] + add rax, qword ptr [rbp - 432] + mov qword ptr [rbp - 440], rax + mov rax, qword ptr [rbp - 72] + movzx ecx, byte ptr [rbp - 73] + add rax, rcx + mov qword ptr [rbp - 448], rax + mov rax, qword ptr [rbp - 64] + add rax, qword ptr [rbp - 448] + mov qword ptr [rbp - 456], rax + mov rax, qword ptr [rbp - 56] + add rax, qword ptr [rbp - 456] + mov qword ptr [rbp - 464], rax + mov rax, qword ptr [rbp - 48] + add rax, qword ptr [rbp - 464] + mov qword ptr [rbp - 472], rax + mov rax, qword ptr [rbp - 40] + add rax, qword ptr [rbp - 472] + mov qword ptr [rbp - 480], rax + mov rax, qword ptr [rbp - 32] + add rax, qword ptr [rbp - 480] + mov qword ptr [rbp - 488], rax + mov rax, qword ptr [rbp - 24] + add rax, qword ptr [rbp - 488] + mov qword ptr [rbp - 496], rax + mov rcx, qword ptr [rbp - 328] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax], rcx + mov rcx, qword ptr [rbp - 384] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 8], rcx + mov rcx, qword ptr [rbp - 440] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 16], rcx + mov rcx, qword ptr [rbp - 496] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 24], rcx + add rsp, 368 + pop rbp + ret diff --git a/fiat-amd64/fiat_p256_from_bytes/gcc_14_1_0.asm b/fiat-amd64/fiat_p256_from_bytes/gcc_14_1_0.asm new file mode 100644 index 0000000000..95270a3616 --- /dev/null +++ b/fiat-amd64/fiat_p256_from_bytes/gcc_14_1_0.asm @@ -0,0 +1,320 @@ + .globl _Z20fiat_p256_from_bytesPmPKh +_Z20fiat_p256_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 376 + mov QWORD PTR [rbp-488], rdi + mov QWORD PTR [rbp-496], rsi + mov rax, QWORD PTR [rbp-496] + add rax, 31 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-8], rax + mov rax, QWORD PTR [rbp-496] + add rax, 30 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-16], rax + mov rax, QWORD PTR [rbp-496] + add rax, 29 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-24], rax + mov rax, QWORD PTR [rbp-496] + add rax, 28 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-32], rax + mov rax, QWORD PTR [rbp-496] + add rax, 27 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-40], rax + mov rax, QWORD PTR [rbp-496] + add rax, 26 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-48], rax + mov rax, QWORD PTR [rbp-496] + add rax, 25 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-56], rax + mov rax, QWORD PTR [rbp-496] + add rax, 24 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-57], al + mov rax, QWORD PTR [rbp-496] + add rax, 23 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-72], rax + mov rax, QWORD PTR [rbp-496] + add rax, 22 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-80], rax + mov rax, QWORD PTR [rbp-496] + add rax, 21 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-88], rax + mov rax, QWORD PTR [rbp-496] + add rax, 20 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-96], rax + mov rax, QWORD PTR [rbp-496] + add rax, 19 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-104], rax + mov rax, QWORD PTR [rbp-496] + add rax, 18 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-112], rax + mov rax, QWORD PTR [rbp-496] + add rax, 17 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-120], rax + mov rax, QWORD PTR [rbp-496] + add rax, 16 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-121], al + mov rax, QWORD PTR [rbp-496] + add rax, 15 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-136], rax + mov rax, QWORD PTR [rbp-496] + add rax, 14 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-144], rax + mov rax, QWORD PTR [rbp-496] + add rax, 13 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-152], rax + mov rax, QWORD PTR [rbp-496] + add rax, 12 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-160], rax + mov rax, QWORD PTR [rbp-496] + add rax, 11 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-168], rax + mov rax, QWORD PTR [rbp-496] + add rax, 10 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-176], rax + mov rax, QWORD PTR [rbp-496] + add rax, 9 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-184], rax + mov rax, QWORD PTR [rbp-496] + add rax, 8 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-185], al + mov rax, QWORD PTR [rbp-496] + add rax, 7 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-200], rax + mov rax, QWORD PTR [rbp-496] + add rax, 6 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-208], rax + mov rax, QWORD PTR [rbp-496] + add rax, 5 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-216], rax + mov rax, QWORD PTR [rbp-496] + add rax, 4 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-224], rax + mov rax, QWORD PTR [rbp-496] + add rax, 3 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-232], rax + mov rax, QWORD PTR [rbp-496] + add rax, 2 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-240], rax + mov rax, QWORD PTR [rbp-496] + add rax, 1 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-248], rax + mov rax, QWORD PTR [rbp-496] + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-249], al + movzx edx, BYTE PTR [rbp-249] + mov rax, QWORD PTR [rbp-248] + add rax, rdx + mov QWORD PTR [rbp-264], rax + mov rdx, QWORD PTR [rbp-240] + mov rax, QWORD PTR [rbp-264] + add rax, rdx + mov QWORD PTR [rbp-272], rax + mov rdx, QWORD PTR [rbp-232] + mov rax, QWORD PTR [rbp-272] + add rax, rdx + mov QWORD PTR [rbp-280], rax + mov rdx, QWORD PTR [rbp-224] + mov rax, QWORD PTR [rbp-280] + add rax, rdx + mov QWORD PTR [rbp-288], rax + mov rdx, QWORD PTR [rbp-216] + mov rax, QWORD PTR [rbp-288] + add rax, rdx + mov QWORD PTR [rbp-296], rax + mov rdx, QWORD PTR [rbp-208] + mov rax, QWORD PTR [rbp-296] + add rax, rdx + mov QWORD PTR [rbp-304], rax + mov rdx, QWORD PTR [rbp-200] + mov rax, QWORD PTR [rbp-304] + add rax, rdx + mov QWORD PTR [rbp-312], rax + movzx edx, BYTE PTR [rbp-185] + mov rax, QWORD PTR [rbp-184] + add rax, rdx + mov QWORD PTR [rbp-320], rax + mov rdx, QWORD PTR [rbp-176] + mov rax, QWORD PTR [rbp-320] + add rax, rdx + mov QWORD PTR [rbp-328], rax + mov rdx, QWORD PTR [rbp-168] + mov rax, QWORD PTR [rbp-328] + add rax, rdx + mov QWORD PTR [rbp-336], rax + mov rdx, QWORD PTR [rbp-160] + mov rax, QWORD PTR [rbp-336] + add rax, rdx + mov QWORD PTR [rbp-344], rax + mov rdx, QWORD PTR [rbp-152] + mov rax, QWORD PTR [rbp-344] + add rax, rdx + mov QWORD PTR [rbp-352], rax + mov rdx, QWORD PTR [rbp-144] + mov rax, QWORD PTR [rbp-352] + add rax, rdx + mov QWORD PTR [rbp-360], rax + mov rdx, QWORD PTR [rbp-136] + mov rax, QWORD PTR [rbp-360] + add rax, rdx + mov QWORD PTR [rbp-368], rax + movzx edx, BYTE PTR [rbp-121] + mov rax, QWORD PTR [rbp-120] + add rax, rdx + mov QWORD PTR [rbp-376], rax + mov rdx, QWORD PTR [rbp-112] + mov rax, QWORD PTR [rbp-376] + add rax, rdx + mov QWORD PTR [rbp-384], rax + mov rdx, QWORD PTR [rbp-104] + mov rax, QWORD PTR [rbp-384] + add rax, rdx + mov QWORD PTR [rbp-392], rax + mov rdx, QWORD PTR [rbp-96] + mov rax, QWORD PTR [rbp-392] + add rax, rdx + mov QWORD PTR [rbp-400], rax + mov rdx, QWORD PTR [rbp-88] + mov rax, QWORD PTR [rbp-400] + add rax, rdx + mov QWORD PTR [rbp-408], rax + mov rdx, QWORD PTR [rbp-80] + mov rax, QWORD PTR [rbp-408] + add rax, rdx + mov QWORD PTR [rbp-416], rax + mov rdx, QWORD PTR [rbp-72] + mov rax, QWORD PTR [rbp-416] + add rax, rdx + mov QWORD PTR [rbp-424], rax + movzx edx, BYTE PTR [rbp-57] + mov rax, QWORD PTR [rbp-56] + add rax, rdx + mov QWORD PTR [rbp-432], rax + mov rdx, QWORD PTR [rbp-48] + mov rax, QWORD PTR [rbp-432] + add rax, rdx + mov QWORD PTR [rbp-440], rax + mov rdx, QWORD PTR [rbp-40] + mov rax, QWORD PTR [rbp-440] + add rax, rdx + mov QWORD PTR [rbp-448], rax + mov rdx, QWORD PTR [rbp-32] + mov rax, QWORD PTR [rbp-448] + add rax, rdx + mov QWORD PTR [rbp-456], rax + mov rdx, QWORD PTR [rbp-24] + mov rax, QWORD PTR [rbp-456] + add rax, rdx + mov QWORD PTR [rbp-464], rax + mov rdx, QWORD PTR [rbp-16] + mov rax, QWORD PTR [rbp-464] + add rax, rdx + mov QWORD PTR [rbp-472], rax + mov rdx, QWORD PTR [rbp-8] + mov rax, QWORD PTR [rbp-472] + add rax, rdx + mov QWORD PTR [rbp-480], rax + mov rax, QWORD PTR [rbp-488] + mov rdx, QWORD PTR [rbp-312] + mov QWORD PTR [rax], rdx + mov rax, QWORD PTR [rbp-488] + lea rdx, [rax+8] + mov rax, QWORD PTR [rbp-368] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-488] + lea rdx, [rax+16] + mov rax, QWORD PTR [rbp-424] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-488] + lea rdx, [rax+24] + mov rax, QWORD PTR [rbp-480] + mov QWORD PTR [rdx], rax + nop + leave + ret diff --git a/fiat-amd64/fiat_p448_solinas_from_bytes/clang_19_1_0_O0.asm b/fiat-amd64/fiat_p448_solinas_from_bytes/clang_19_1_0_O0.asm new file mode 100644 index 0000000000..ce8b83828e --- /dev/null +++ b/fiat-amd64/fiat_p448_solinas_from_bytes/clang_19_1_0_O0.asm @@ -0,0 +1,402 @@ + .globl _Z20fiat_p448_solinas_from_bytesPmPKh +_Z20fiat_p448_solinas_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 720 + mov qword ptr [rbp - 8], rdi + mov qword ptr [rbp - 16], rsi + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 55] + shl rax, 48 + mov qword ptr [rbp - 24], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 54] + shl rax, 40 + mov qword ptr [rbp - 32], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 53] + shl rax, 32 + mov qword ptr [rbp - 40], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 52] + shl rax, 24 + mov qword ptr [rbp - 48], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 51] + shl rax, 16 + mov qword ptr [rbp - 56], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 50] + shl rax, 8 + mov qword ptr [rbp - 64], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 49] + mov byte ptr [rbp - 65], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 48] + shl rax, 48 + mov qword ptr [rbp - 80], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 47] + shl rax, 40 + mov qword ptr [rbp - 88], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 46] + shl rax, 32 + mov qword ptr [rbp - 96], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 45] + shl rax, 24 + mov qword ptr [rbp - 104], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 44] + shl rax, 16 + mov qword ptr [rbp - 112], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 43] + shl rax, 8 + mov qword ptr [rbp - 120], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 42] + mov byte ptr [rbp - 121], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 41] + shl rax, 48 + mov qword ptr [rbp - 136], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 40] + shl rax, 40 + mov qword ptr [rbp - 144], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 39] + shl rax, 32 + mov qword ptr [rbp - 152], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 38] + shl rax, 24 + mov qword ptr [rbp - 160], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 37] + shl rax, 16 + mov qword ptr [rbp - 168], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 36] + shl rax, 8 + mov qword ptr [rbp - 176], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 35] + mov byte ptr [rbp - 177], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 34] + shl rax, 48 + mov qword ptr [rbp - 192], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 33] + shl rax, 40 + mov qword ptr [rbp - 200], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 32] + shl rax, 32 + mov qword ptr [rbp - 208], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 31] + shl rax, 24 + mov qword ptr [rbp - 216], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 30] + shl rax, 16 + mov qword ptr [rbp - 224], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 29] + shl rax, 8 + mov qword ptr [rbp - 232], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 28] + mov byte ptr [rbp - 233], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 27] + shl rax, 48 + mov qword ptr [rbp - 248], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 26] + shl rax, 40 + mov qword ptr [rbp - 256], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 25] + shl rax, 32 + mov qword ptr [rbp - 264], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 24] + shl rax, 24 + mov qword ptr [rbp - 272], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 23] + shl rax, 16 + mov qword ptr [rbp - 280], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 22] + shl rax, 8 + mov qword ptr [rbp - 288], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 21] + mov byte ptr [rbp - 289], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 20] + shl rax, 48 + mov qword ptr [rbp - 304], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 19] + shl rax, 40 + mov qword ptr [rbp - 312], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 18] + shl rax, 32 + mov qword ptr [rbp - 320], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 17] + shl rax, 24 + mov qword ptr [rbp - 328], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 16] + shl rax, 16 + mov qword ptr [rbp - 336], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 15] + shl rax, 8 + mov qword ptr [rbp - 344], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 14] + mov byte ptr [rbp - 345], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 13] + shl rax, 48 + mov qword ptr [rbp - 360], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 12] + shl rax, 40 + mov qword ptr [rbp - 368], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 11] + shl rax, 32 + mov qword ptr [rbp - 376], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 10] + shl rax, 24 + mov qword ptr [rbp - 384], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 9] + shl rax, 16 + mov qword ptr [rbp - 392], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 8] + shl rax, 8 + mov qword ptr [rbp - 400], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 7] + mov byte ptr [rbp - 401], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 6] + shl rax, 48 + mov qword ptr [rbp - 416], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 5] + shl rax, 40 + mov qword ptr [rbp - 424], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 4] + shl rax, 32 + mov qword ptr [rbp - 432], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 3] + shl rax, 24 + mov qword ptr [rbp - 440], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 2] + shl rax, 16 + mov qword ptr [rbp - 448], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 1] + shl rax, 8 + mov qword ptr [rbp - 456], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax] + mov byte ptr [rbp - 457], al + mov rax, qword ptr [rbp - 456] + movzx ecx, byte ptr [rbp - 457] + add rax, rcx + mov qword ptr [rbp - 472], rax + mov rax, qword ptr [rbp - 448] + add rax, qword ptr [rbp - 472] + mov qword ptr [rbp - 480], rax + mov rax, qword ptr [rbp - 440] + add rax, qword ptr [rbp - 480] + mov qword ptr [rbp - 488], rax + mov rax, qword ptr [rbp - 432] + add rax, qword ptr [rbp - 488] + mov qword ptr [rbp - 496], rax + mov rax, qword ptr [rbp - 424] + add rax, qword ptr [rbp - 496] + mov qword ptr [rbp - 504], rax + mov rax, qword ptr [rbp - 416] + add rax, qword ptr [rbp - 504] + mov qword ptr [rbp - 512], rax + mov rax, qword ptr [rbp - 400] + movzx ecx, byte ptr [rbp - 401] + add rax, rcx + mov qword ptr [rbp - 520], rax + mov rax, qword ptr [rbp - 392] + add rax, qword ptr [rbp - 520] + mov qword ptr [rbp - 528], rax + mov rax, qword ptr [rbp - 384] + add rax, qword ptr [rbp - 528] + mov qword ptr [rbp - 536], rax + mov rax, qword ptr [rbp - 376] + add rax, qword ptr [rbp - 536] + mov qword ptr [rbp - 544], rax + mov rax, qword ptr [rbp - 368] + add rax, qword ptr [rbp - 544] + mov qword ptr [rbp - 552], rax + mov rax, qword ptr [rbp - 360] + add rax, qword ptr [rbp - 552] + mov qword ptr [rbp - 560], rax + mov rax, qword ptr [rbp - 344] + movzx ecx, byte ptr [rbp - 345] + add rax, rcx + mov qword ptr [rbp - 568], rax + mov rax, qword ptr [rbp - 336] + add rax, qword ptr [rbp - 568] + mov qword ptr [rbp - 576], rax + mov rax, qword ptr [rbp - 328] + add rax, qword ptr [rbp - 576] + mov qword ptr [rbp - 584], rax + mov rax, qword ptr [rbp - 320] + add rax, qword ptr [rbp - 584] + mov qword ptr [rbp - 592], rax + mov rax, qword ptr [rbp - 312] + add rax, qword ptr [rbp - 592] + mov qword ptr [rbp - 600], rax + mov rax, qword ptr [rbp - 304] + add rax, qword ptr [rbp - 600] + mov qword ptr [rbp - 608], rax + mov rax, qword ptr [rbp - 288] + movzx ecx, byte ptr [rbp - 289] + add rax, rcx + mov qword ptr [rbp - 616], rax + mov rax, qword ptr [rbp - 280] + add rax, qword ptr [rbp - 616] + mov qword ptr [rbp - 624], rax + mov rax, qword ptr [rbp - 272] + add rax, qword ptr [rbp - 624] + mov qword ptr [rbp - 632], rax + mov rax, qword ptr [rbp - 264] + add rax, qword ptr [rbp - 632] + mov qword ptr [rbp - 640], rax + mov rax, qword ptr [rbp - 256] + add rax, qword ptr [rbp - 640] + mov qword ptr [rbp - 648], rax + mov rax, qword ptr [rbp - 248] + add rax, qword ptr [rbp - 648] + mov qword ptr [rbp - 656], rax + mov rax, qword ptr [rbp - 232] + movzx ecx, byte ptr [rbp - 233] + add rax, rcx + mov qword ptr [rbp - 664], rax + mov rax, qword ptr [rbp - 224] + add rax, qword ptr [rbp - 664] + mov qword ptr [rbp - 672], rax + mov rax, qword ptr [rbp - 216] + add rax, qword ptr [rbp - 672] + mov qword ptr [rbp - 680], rax + mov rax, qword ptr [rbp - 208] + add rax, qword ptr [rbp - 680] + mov qword ptr [rbp - 688], rax + mov rax, qword ptr [rbp - 200] + add rax, qword ptr [rbp - 688] + mov qword ptr [rbp - 696], rax + mov rax, qword ptr [rbp - 192] + add rax, qword ptr [rbp - 696] + mov qword ptr [rbp - 704], rax + mov rax, qword ptr [rbp - 176] + movzx ecx, byte ptr [rbp - 177] + add rax, rcx + mov qword ptr [rbp - 712], rax + mov rax, qword ptr [rbp - 168] + add rax, qword ptr [rbp - 712] + mov qword ptr [rbp - 720], rax + mov rax, qword ptr [rbp - 160] + add rax, qword ptr [rbp - 720] + mov qword ptr [rbp - 728], rax + mov rax, qword ptr [rbp - 152] + add rax, qword ptr [rbp - 728] + mov qword ptr [rbp - 736], rax + mov rax, qword ptr [rbp - 144] + add rax, qword ptr [rbp - 736] + mov qword ptr [rbp - 744], rax + mov rax, qword ptr [rbp - 136] + add rax, qword ptr [rbp - 744] + mov qword ptr [rbp - 752], rax + mov rax, qword ptr [rbp - 120] + movzx ecx, byte ptr [rbp - 121] + add rax, rcx + mov qword ptr [rbp - 760], rax + mov rax, qword ptr [rbp - 112] + add rax, qword ptr [rbp - 760] + mov qword ptr [rbp - 768], rax + mov rax, qword ptr [rbp - 104] + add rax, qword ptr [rbp - 768] + mov qword ptr [rbp - 776], rax + mov rax, qword ptr [rbp - 96] + add rax, qword ptr [rbp - 776] + mov qword ptr [rbp - 784], rax + mov rax, qword ptr [rbp - 88] + add rax, qword ptr [rbp - 784] + mov qword ptr [rbp - 792], rax + mov rax, qword ptr [rbp - 80] + add rax, qword ptr [rbp - 792] + mov qword ptr [rbp - 800], rax + mov rax, qword ptr [rbp - 64] + movzx ecx, byte ptr [rbp - 65] + add rax, rcx + mov qword ptr [rbp - 808], rax + mov rax, qword ptr [rbp - 56] + add rax, qword ptr [rbp - 808] + mov qword ptr [rbp - 816], rax + mov rax, qword ptr [rbp - 48] + add rax, qword ptr [rbp - 816] + mov qword ptr [rbp - 824], rax + mov rax, qword ptr [rbp - 40] + add rax, qword ptr [rbp - 824] + mov qword ptr [rbp - 832], rax + mov rax, qword ptr [rbp - 32] + add rax, qword ptr [rbp - 832] + mov qword ptr [rbp - 840], rax + mov rax, qword ptr [rbp - 24] + add rax, qword ptr [rbp - 840] + mov qword ptr [rbp - 848], rax + mov rcx, qword ptr [rbp - 512] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax], rcx + mov rcx, qword ptr [rbp - 560] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 8], rcx + mov rcx, qword ptr [rbp - 608] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 16], rcx + mov rcx, qword ptr [rbp - 656] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 24], rcx + mov rcx, qword ptr [rbp - 704] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 32], rcx + mov rcx, qword ptr [rbp - 752] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 40], rcx + mov rcx, qword ptr [rbp - 800] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 48], rcx + mov rcx, qword ptr [rbp - 848] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 56], rcx + add rsp, 720 + pop rbp + ret diff --git a/fiat-amd64/fiat_p448_solinas_from_bytes/gcc_14_1_0.asm b/fiat-amd64/fiat_p448_solinas_from_bytes/gcc_14_1_0.asm new file mode 100644 index 0000000000..8a01dfa42c --- /dev/null +++ b/fiat-amd64/fiat_p448_solinas_from_bytes/gcc_14_1_0.asm @@ -0,0 +1,552 @@ + .globl _Z20fiat_p448_solinas_from_bytesPmPKh +_Z20fiat_p448_solinas_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 728 + mov QWORD PTR [rbp-840], rdi + mov QWORD PTR [rbp-848], rsi + mov rax, QWORD PTR [rbp-848] + add rax, 55 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-8], rax + mov rax, QWORD PTR [rbp-848] + add rax, 54 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-16], rax + mov rax, QWORD PTR [rbp-848] + add rax, 53 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-24], rax + mov rax, QWORD PTR [rbp-848] + add rax, 52 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-32], rax + mov rax, QWORD PTR [rbp-848] + add rax, 51 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-40], rax + mov rax, QWORD PTR [rbp-848] + add rax, 50 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-48], rax + mov rax, QWORD PTR [rbp-848] + add rax, 49 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-49], al + mov rax, QWORD PTR [rbp-848] + add rax, 48 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-64], rax + mov rax, QWORD PTR [rbp-848] + add rax, 47 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-72], rax + mov rax, QWORD PTR [rbp-848] + add rax, 46 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-80], rax + mov rax, QWORD PTR [rbp-848] + add rax, 45 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-88], rax + mov rax, QWORD PTR [rbp-848] + add rax, 44 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-96], rax + mov rax, QWORD PTR [rbp-848] + add rax, 43 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-104], rax + mov rax, QWORD PTR [rbp-848] + add rax, 42 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-105], al + mov rax, QWORD PTR [rbp-848] + add rax, 41 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-120], rax + mov rax, QWORD PTR [rbp-848] + add rax, 40 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-128], rax + mov rax, QWORD PTR [rbp-848] + add rax, 39 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-136], rax + mov rax, QWORD PTR [rbp-848] + add rax, 38 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-144], rax + mov rax, QWORD PTR [rbp-848] + add rax, 37 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-152], rax + mov rax, QWORD PTR [rbp-848] + add rax, 36 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-160], rax + mov rax, QWORD PTR [rbp-848] + add rax, 35 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-161], al + mov rax, QWORD PTR [rbp-848] + add rax, 34 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-176], rax + mov rax, QWORD PTR [rbp-848] + add rax, 33 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-184], rax + mov rax, QWORD PTR [rbp-848] + add rax, 32 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-192], rax + mov rax, QWORD PTR [rbp-848] + add rax, 31 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-200], rax + mov rax, QWORD PTR [rbp-848] + add rax, 30 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-208], rax + mov rax, QWORD PTR [rbp-848] + add rax, 29 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-216], rax + mov rax, QWORD PTR [rbp-848] + add rax, 28 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-217], al + mov rax, QWORD PTR [rbp-848] + add rax, 27 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-232], rax + mov rax, QWORD PTR [rbp-848] + add rax, 26 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-240], rax + mov rax, QWORD PTR [rbp-848] + add rax, 25 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-248], rax + mov rax, QWORD PTR [rbp-848] + add rax, 24 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-256], rax + mov rax, QWORD PTR [rbp-848] + add rax, 23 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-264], rax + mov rax, QWORD PTR [rbp-848] + add rax, 22 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-272], rax + mov rax, QWORD PTR [rbp-848] + add rax, 21 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-273], al + mov rax, QWORD PTR [rbp-848] + add rax, 20 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-288], rax + mov rax, QWORD PTR [rbp-848] + add rax, 19 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-296], rax + mov rax, QWORD PTR [rbp-848] + add rax, 18 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-304], rax + mov rax, QWORD PTR [rbp-848] + add rax, 17 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-312], rax + mov rax, QWORD PTR [rbp-848] + add rax, 16 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-320], rax + mov rax, QWORD PTR [rbp-848] + add rax, 15 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-328], rax + mov rax, QWORD PTR [rbp-848] + add rax, 14 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-329], al + mov rax, QWORD PTR [rbp-848] + add rax, 13 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-344], rax + mov rax, QWORD PTR [rbp-848] + add rax, 12 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-352], rax + mov rax, QWORD PTR [rbp-848] + add rax, 11 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-360], rax + mov rax, QWORD PTR [rbp-848] + add rax, 10 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-368], rax + mov rax, QWORD PTR [rbp-848] + add rax, 9 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-376], rax + mov rax, QWORD PTR [rbp-848] + add rax, 8 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-384], rax + mov rax, QWORD PTR [rbp-848] + add rax, 7 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-385], al + mov rax, QWORD PTR [rbp-848] + add rax, 6 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-400], rax + mov rax, QWORD PTR [rbp-848] + add rax, 5 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-408], rax + mov rax, QWORD PTR [rbp-848] + add rax, 4 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-416], rax + mov rax, QWORD PTR [rbp-848] + add rax, 3 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-424], rax + mov rax, QWORD PTR [rbp-848] + add rax, 2 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-432], rax + mov rax, QWORD PTR [rbp-848] + add rax, 1 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-440], rax + mov rax, QWORD PTR [rbp-848] + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-441], al + movzx edx, BYTE PTR [rbp-441] + mov rax, QWORD PTR [rbp-440] + add rax, rdx + mov QWORD PTR [rbp-456], rax + mov rdx, QWORD PTR [rbp-432] + mov rax, QWORD PTR [rbp-456] + add rax, rdx + mov QWORD PTR [rbp-464], rax + mov rdx, QWORD PTR [rbp-424] + mov rax, QWORD PTR [rbp-464] + add rax, rdx + mov QWORD PTR [rbp-472], rax + mov rdx, QWORD PTR [rbp-416] + mov rax, QWORD PTR [rbp-472] + add rax, rdx + mov QWORD PTR [rbp-480], rax + mov rdx, QWORD PTR [rbp-408] + mov rax, QWORD PTR [rbp-480] + add rax, rdx + mov QWORD PTR [rbp-488], rax + mov rdx, QWORD PTR [rbp-400] + mov rax, QWORD PTR [rbp-488] + add rax, rdx + mov QWORD PTR [rbp-496], rax + movzx edx, BYTE PTR [rbp-385] + mov rax, QWORD PTR [rbp-384] + add rax, rdx + mov QWORD PTR [rbp-504], rax + mov rdx, QWORD PTR [rbp-376] + mov rax, QWORD PTR [rbp-504] + add rax, rdx + mov QWORD PTR [rbp-512], rax + mov rdx, QWORD PTR [rbp-368] + mov rax, QWORD PTR [rbp-512] + add rax, rdx + mov QWORD PTR [rbp-520], rax + mov rdx, QWORD PTR [rbp-360] + mov rax, QWORD PTR [rbp-520] + add rax, rdx + mov QWORD PTR [rbp-528], rax + mov rdx, QWORD PTR [rbp-352] + mov rax, QWORD PTR [rbp-528] + add rax, rdx + mov QWORD PTR [rbp-536], rax + mov rdx, QWORD PTR [rbp-344] + mov rax, QWORD PTR [rbp-536] + add rax, rdx + mov QWORD PTR [rbp-544], rax + movzx edx, BYTE PTR [rbp-329] + mov rax, QWORD PTR [rbp-328] + add rax, rdx + mov QWORD PTR [rbp-552], rax + mov rdx, QWORD PTR [rbp-320] + mov rax, QWORD PTR [rbp-552] + add rax, rdx + mov QWORD PTR [rbp-560], rax + mov rdx, QWORD PTR [rbp-312] + mov rax, QWORD PTR [rbp-560] + add rax, rdx + mov QWORD PTR [rbp-568], rax + mov rdx, QWORD PTR [rbp-304] + mov rax, QWORD PTR [rbp-568] + add rax, rdx + mov QWORD PTR [rbp-576], rax + mov rdx, QWORD PTR [rbp-296] + mov rax, QWORD PTR [rbp-576] + add rax, rdx + mov QWORD PTR [rbp-584], rax + mov rdx, QWORD PTR [rbp-288] + mov rax, QWORD PTR [rbp-584] + add rax, rdx + mov QWORD PTR [rbp-592], rax + movzx edx, BYTE PTR [rbp-273] + mov rax, QWORD PTR [rbp-272] + add rax, rdx + mov QWORD PTR [rbp-600], rax + mov rdx, QWORD PTR [rbp-264] + mov rax, QWORD PTR [rbp-600] + add rax, rdx + mov QWORD PTR [rbp-608], rax + mov rdx, QWORD PTR [rbp-256] + mov rax, QWORD PTR [rbp-608] + add rax, rdx + mov QWORD PTR [rbp-616], rax + mov rdx, QWORD PTR [rbp-248] + mov rax, QWORD PTR [rbp-616] + add rax, rdx + mov QWORD PTR [rbp-624], rax + mov rdx, QWORD PTR [rbp-240] + mov rax, QWORD PTR [rbp-624] + add rax, rdx + mov QWORD PTR [rbp-632], rax + mov rdx, QWORD PTR [rbp-232] + mov rax, QWORD PTR [rbp-632] + add rax, rdx + mov QWORD PTR [rbp-640], rax + movzx edx, BYTE PTR [rbp-217] + mov rax, QWORD PTR [rbp-216] + add rax, rdx + mov QWORD PTR [rbp-648], rax + mov rdx, QWORD PTR [rbp-208] + mov rax, QWORD PTR [rbp-648] + add rax, rdx + mov QWORD PTR [rbp-656], rax + mov rdx, QWORD PTR [rbp-200] + mov rax, QWORD PTR [rbp-656] + add rax, rdx + mov QWORD PTR [rbp-664], rax + mov rdx, QWORD PTR [rbp-192] + mov rax, QWORD PTR [rbp-664] + add rax, rdx + mov QWORD PTR [rbp-672], rax + mov rdx, QWORD PTR [rbp-184] + mov rax, QWORD PTR [rbp-672] + add rax, rdx + mov QWORD PTR [rbp-680], rax + mov rdx, QWORD PTR [rbp-176] + mov rax, QWORD PTR [rbp-680] + add rax, rdx + mov QWORD PTR [rbp-688], rax + movzx edx, BYTE PTR [rbp-161] + mov rax, QWORD PTR [rbp-160] + add rax, rdx + mov QWORD PTR [rbp-696], rax + mov rdx, QWORD PTR [rbp-152] + mov rax, QWORD PTR [rbp-696] + add rax, rdx + mov QWORD PTR [rbp-704], rax + mov rdx, QWORD PTR [rbp-144] + mov rax, QWORD PTR [rbp-704] + add rax, rdx + mov QWORD PTR [rbp-712], rax + mov rdx, QWORD PTR [rbp-136] + mov rax, QWORD PTR [rbp-712] + add rax, rdx + mov QWORD PTR [rbp-720], rax + mov rdx, QWORD PTR [rbp-128] + mov rax, QWORD PTR [rbp-720] + add rax, rdx + mov QWORD PTR [rbp-728], rax + mov rdx, QWORD PTR [rbp-120] + mov rax, QWORD PTR [rbp-728] + add rax, rdx + mov QWORD PTR [rbp-736], rax + movzx edx, BYTE PTR [rbp-105] + mov rax, QWORD PTR [rbp-104] + add rax, rdx + mov QWORD PTR [rbp-744], rax + mov rdx, QWORD PTR [rbp-96] + mov rax, QWORD PTR [rbp-744] + add rax, rdx + mov QWORD PTR [rbp-752], rax + mov rdx, QWORD PTR [rbp-88] + mov rax, QWORD PTR [rbp-752] + add rax, rdx + mov QWORD PTR [rbp-760], rax + mov rdx, QWORD PTR [rbp-80] + mov rax, QWORD PTR [rbp-760] + add rax, rdx + mov QWORD PTR [rbp-768], rax + mov rdx, QWORD PTR [rbp-72] + mov rax, QWORD PTR [rbp-768] + add rax, rdx + mov QWORD PTR [rbp-776], rax + mov rdx, QWORD PTR [rbp-64] + mov rax, QWORD PTR [rbp-776] + add rax, rdx + mov QWORD PTR [rbp-784], rax + movzx edx, BYTE PTR [rbp-49] + mov rax, QWORD PTR [rbp-48] + add rax, rdx + mov QWORD PTR [rbp-792], rax + mov rdx, QWORD PTR [rbp-40] + mov rax, QWORD PTR [rbp-792] + add rax, rdx + mov QWORD PTR [rbp-800], rax + mov rdx, QWORD PTR [rbp-32] + mov rax, QWORD PTR [rbp-800] + add rax, rdx + mov QWORD PTR [rbp-808], rax + mov rdx, QWORD PTR [rbp-24] + mov rax, QWORD PTR [rbp-808] + add rax, rdx + mov QWORD PTR [rbp-816], rax + mov rdx, QWORD PTR [rbp-16] + mov rax, QWORD PTR [rbp-816] + add rax, rdx + mov QWORD PTR [rbp-824], rax + mov rdx, QWORD PTR [rbp-8] + mov rax, QWORD PTR [rbp-824] + add rax, rdx + mov QWORD PTR [rbp-832], rax + mov rax, QWORD PTR [rbp-840] + mov rdx, QWORD PTR [rbp-496] + mov QWORD PTR [rax], rdx + mov rax, QWORD PTR [rbp-840] + lea rdx, [rax+8] + mov rax, QWORD PTR [rbp-544] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-840] + lea rdx, [rax+16] + mov rax, QWORD PTR [rbp-592] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-840] + lea rdx, [rax+24] + mov rax, QWORD PTR [rbp-640] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-840] + lea rdx, [rax+32] + mov rax, QWORD PTR [rbp-688] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-840] + lea rdx, [rax+40] + mov rax, QWORD PTR [rbp-736] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-840] + lea rdx, [rax+48] + mov rax, QWORD PTR [rbp-784] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-840] + lea rdx, [rax+56] + mov rax, QWORD PTR [rbp-832] + mov QWORD PTR [rdx], rax + nop + leave + ret diff --git a/fiat-amd64/fiat_p521_from_bytes/clang_19_1_0_O0.asm b/fiat-amd64/fiat_p521_from_bytes/clang_19_1_0_O0.asm new file mode 100644 index 0000000000..9a001eb1ed --- /dev/null +++ b/fiat-amd64/fiat_p521_from_bytes/clang_19_1_0_O0.asm @@ -0,0 +1,532 @@ + .globl _Z20fiat_p521_from_bytesPmPKh +_Z20fiat_p521_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 1016 + mov qword ptr [rbp - 8], rdi + mov qword ptr [rbp - 16], rsi + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 65] + shl rax, 56 + mov qword ptr [rbp - 24], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 64] + shl rax, 48 + mov qword ptr [rbp - 32], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 63] + shl rax, 40 + mov qword ptr [rbp - 40], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 62] + shl rax, 32 + mov qword ptr [rbp - 48], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 61] + shl rax, 24 + mov qword ptr [rbp - 56], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 60] + shl rax, 16 + mov qword ptr [rbp - 64], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 59] + shl rax, 8 + mov qword ptr [rbp - 72], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 58] + mov byte ptr [rbp - 73], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 57] + shl rax, 50 + mov qword ptr [rbp - 88], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 56] + shl rax, 42 + mov qword ptr [rbp - 96], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 55] + shl rax, 34 + mov qword ptr [rbp - 104], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 54] + shl rax, 26 + mov qword ptr [rbp - 112], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 53] + shl rax, 18 + mov qword ptr [rbp - 120], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 52] + shl rax, 10 + mov qword ptr [rbp - 128], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 51] + shl rax, 2 + mov qword ptr [rbp - 136], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 50] + shl rax, 52 + mov qword ptr [rbp - 144], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 49] + shl rax, 44 + mov qword ptr [rbp - 152], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 48] + shl rax, 36 + mov qword ptr [rbp - 160], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 47] + shl rax, 28 + mov qword ptr [rbp - 168], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 46] + shl rax, 20 + mov qword ptr [rbp - 176], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 45] + shl rax, 12 + mov qword ptr [rbp - 184], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 44] + shl rax, 4 + mov qword ptr [rbp - 192], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 43] + shl rax, 54 + mov qword ptr [rbp - 200], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 42] + shl rax, 46 + mov qword ptr [rbp - 208], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 41] + shl rax, 38 + mov qword ptr [rbp - 216], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 40] + shl rax, 30 + mov qword ptr [rbp - 224], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 39] + shl rax, 22 + mov qword ptr [rbp - 232], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 38] + shl rax, 14 + mov qword ptr [rbp - 240], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 37] + shl rax, 6 + mov qword ptr [rbp - 248], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 36] + shl rax, 56 + mov qword ptr [rbp - 256], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 35] + shl rax, 48 + mov qword ptr [rbp - 264], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 34] + shl rax, 40 + mov qword ptr [rbp - 272], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 33] + shl rax, 32 + mov qword ptr [rbp - 280], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 32] + shl rax, 24 + mov qword ptr [rbp - 288], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 31] + shl rax, 16 + mov qword ptr [rbp - 296], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 30] + shl rax, 8 + mov qword ptr [rbp - 304], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax + 29] + mov byte ptr [rbp - 305], al + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 28] + shl rax, 50 + mov qword ptr [rbp - 320], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 27] + shl rax, 42 + mov qword ptr [rbp - 328], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 26] + shl rax, 34 + mov qword ptr [rbp - 336], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 25] + shl rax, 26 + mov qword ptr [rbp - 344], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 24] + shl rax, 18 + mov qword ptr [rbp - 352], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 23] + shl rax, 10 + mov qword ptr [rbp - 360], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 22] + shl rax, 2 + mov qword ptr [rbp - 368], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 21] + shl rax, 52 + mov qword ptr [rbp - 376], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 20] + shl rax, 44 + mov qword ptr [rbp - 384], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 19] + shl rax, 36 + mov qword ptr [rbp - 392], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 18] + shl rax, 28 + mov qword ptr [rbp - 400], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 17] + shl rax, 20 + mov qword ptr [rbp - 408], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 16] + shl rax, 12 + mov qword ptr [rbp - 416], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 15] + shl rax, 4 + mov qword ptr [rbp - 424], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 14] + shl rax, 54 + mov qword ptr [rbp - 432], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 13] + shl rax, 46 + mov qword ptr [rbp - 440], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 12] + shl rax, 38 + mov qword ptr [rbp - 448], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 11] + shl rax, 30 + mov qword ptr [rbp - 456], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 10] + shl rax, 22 + mov qword ptr [rbp - 464], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 9] + shl rax, 14 + mov qword ptr [rbp - 472], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 8] + shl rax, 6 + mov qword ptr [rbp - 480], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 7] + shl rax, 56 + mov qword ptr [rbp - 488], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 6] + shl rax, 48 + mov qword ptr [rbp - 496], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 5] + shl rax, 40 + mov qword ptr [rbp - 504], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 4] + shl rax, 32 + mov qword ptr [rbp - 512], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 3] + shl rax, 24 + mov qword ptr [rbp - 520], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 2] + shl rax, 16 + mov qword ptr [rbp - 528], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 1] + shl rax, 8 + mov qword ptr [rbp - 536], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax] + mov byte ptr [rbp - 537], al + mov rax, qword ptr [rbp - 536] + movzx ecx, byte ptr [rbp - 537] + add rax, rcx + mov qword ptr [rbp - 552], rax + mov rax, qword ptr [rbp - 528] + add rax, qword ptr [rbp - 552] + mov qword ptr [rbp - 560], rax + mov rax, qword ptr [rbp - 520] + add rax, qword ptr [rbp - 560] + mov qword ptr [rbp - 568], rax + mov rax, qword ptr [rbp - 512] + add rax, qword ptr [rbp - 568] + mov qword ptr [rbp - 576], rax + mov rax, qword ptr [rbp - 504] + add rax, qword ptr [rbp - 576] + mov qword ptr [rbp - 584], rax + mov rax, qword ptr [rbp - 496] + add rax, qword ptr [rbp - 584] + mov qword ptr [rbp - 592], rax + mov rax, qword ptr [rbp - 488] + add rax, qword ptr [rbp - 592] + mov qword ptr [rbp - 600], rax + movabs rax, 288230376151711743 + and rax, qword ptr [rbp - 600] + mov qword ptr [rbp - 608], rax + mov rax, qword ptr [rbp - 600] + shr rax, 58 + mov byte ptr [rbp - 609], al + mov rax, qword ptr [rbp - 480] + movzx ecx, byte ptr [rbp - 609] + add rax, rcx + mov qword ptr [rbp - 624], rax + mov rax, qword ptr [rbp - 472] + add rax, qword ptr [rbp - 624] + mov qword ptr [rbp - 632], rax + mov rax, qword ptr [rbp - 464] + add rax, qword ptr [rbp - 632] + mov qword ptr [rbp - 640], rax + mov rax, qword ptr [rbp - 456] + add rax, qword ptr [rbp - 640] + mov qword ptr [rbp - 648], rax + mov rax, qword ptr [rbp - 448] + add rax, qword ptr [rbp - 648] + mov qword ptr [rbp - 656], rax + mov rax, qword ptr [rbp - 440] + add rax, qword ptr [rbp - 656] + mov qword ptr [rbp - 664], rax + mov rax, qword ptr [rbp - 432] + add rax, qword ptr [rbp - 664] + mov qword ptr [rbp - 672], rax + movabs rax, 288230376151711743 + and rax, qword ptr [rbp - 672] + mov qword ptr [rbp - 680], rax + mov rax, qword ptr [rbp - 672] + shr rax, 58 + mov byte ptr [rbp - 681], al + mov rax, qword ptr [rbp - 424] + movzx ecx, byte ptr [rbp - 681] + add rax, rcx + mov qword ptr [rbp - 696], rax + mov rax, qword ptr [rbp - 416] + add rax, qword ptr [rbp - 696] + mov qword ptr [rbp - 704], rax + mov rax, qword ptr [rbp - 408] + add rax, qword ptr [rbp - 704] + mov qword ptr [rbp - 712], rax + mov rax, qword ptr [rbp - 400] + add rax, qword ptr [rbp - 712] + mov qword ptr [rbp - 720], rax + mov rax, qword ptr [rbp - 392] + add rax, qword ptr [rbp - 720] + mov qword ptr [rbp - 728], rax + mov rax, qword ptr [rbp - 384] + add rax, qword ptr [rbp - 728] + mov qword ptr [rbp - 736], rax + mov rax, qword ptr [rbp - 376] + add rax, qword ptr [rbp - 736] + mov qword ptr [rbp - 744], rax + movabs rax, 288230376151711743 + and rax, qword ptr [rbp - 744] + mov qword ptr [rbp - 752], rax + mov rax, qword ptr [rbp - 744] + shr rax, 58 + mov byte ptr [rbp - 753], al + mov rax, qword ptr [rbp - 368] + movzx ecx, byte ptr [rbp - 753] + add rax, rcx + mov qword ptr [rbp - 768], rax + mov rax, qword ptr [rbp - 360] + add rax, qword ptr [rbp - 768] + mov qword ptr [rbp - 776], rax + mov rax, qword ptr [rbp - 352] + add rax, qword ptr [rbp - 776] + mov qword ptr [rbp - 784], rax + mov rax, qword ptr [rbp - 344] + add rax, qword ptr [rbp - 784] + mov qword ptr [rbp - 792], rax + mov rax, qword ptr [rbp - 336] + add rax, qword ptr [rbp - 792] + mov qword ptr [rbp - 800], rax + mov rax, qword ptr [rbp - 328] + add rax, qword ptr [rbp - 800] + mov qword ptr [rbp - 808], rax + mov rax, qword ptr [rbp - 320] + add rax, qword ptr [rbp - 808] + mov qword ptr [rbp - 816], rax + mov rax, qword ptr [rbp - 304] + movzx ecx, byte ptr [rbp - 305] + add rax, rcx + mov qword ptr [rbp - 824], rax + mov rax, qword ptr [rbp - 296] + add rax, qword ptr [rbp - 824] + mov qword ptr [rbp - 832], rax + mov rax, qword ptr [rbp - 288] + add rax, qword ptr [rbp - 832] + mov qword ptr [rbp - 840], rax + mov rax, qword ptr [rbp - 280] + add rax, qword ptr [rbp - 840] + mov qword ptr [rbp - 848], rax + mov rax, qword ptr [rbp - 272] + add rax, qword ptr [rbp - 848] + mov qword ptr [rbp - 856], rax + mov rax, qword ptr [rbp - 264] + add rax, qword ptr [rbp - 856] + mov qword ptr [rbp - 864], rax + mov rax, qword ptr [rbp - 256] + add rax, qword ptr [rbp - 864] + mov qword ptr [rbp - 872], rax + movabs rax, 288230376151711743 + and rax, qword ptr [rbp - 872] + mov qword ptr [rbp - 880], rax + mov rax, qword ptr [rbp - 872] + shr rax, 58 + mov byte ptr [rbp - 881], al + mov rax, qword ptr [rbp - 248] + movzx ecx, byte ptr [rbp - 881] + add rax, rcx + mov qword ptr [rbp - 896], rax + mov rax, qword ptr [rbp - 240] + add rax, qword ptr [rbp - 896] + mov qword ptr [rbp - 904], rax + mov rax, qword ptr [rbp - 232] + add rax, qword ptr [rbp - 904] + mov qword ptr [rbp - 912], rax + mov rax, qword ptr [rbp - 224] + add rax, qword ptr [rbp - 912] + mov qword ptr [rbp - 920], rax + mov rax, qword ptr [rbp - 216] + add rax, qword ptr [rbp - 920] + mov qword ptr [rbp - 928], rax + mov rax, qword ptr [rbp - 208] + add rax, qword ptr [rbp - 928] + mov qword ptr [rbp - 936], rax + mov rax, qword ptr [rbp - 200] + add rax, qword ptr [rbp - 936] + mov qword ptr [rbp - 944], rax + movabs rax, 288230376151711743 + and rax, qword ptr [rbp - 944] + mov qword ptr [rbp - 952], rax + mov rax, qword ptr [rbp - 944] + shr rax, 58 + mov byte ptr [rbp - 953], al + mov rax, qword ptr [rbp - 192] + movzx ecx, byte ptr [rbp - 953] + add rax, rcx + mov qword ptr [rbp - 968], rax + mov rax, qword ptr [rbp - 184] + add rax, qword ptr [rbp - 968] + mov qword ptr [rbp - 976], rax + mov rax, qword ptr [rbp - 176] + add rax, qword ptr [rbp - 976] + mov qword ptr [rbp - 984], rax + mov rax, qword ptr [rbp - 168] + add rax, qword ptr [rbp - 984] + mov qword ptr [rbp - 992], rax + mov rax, qword ptr [rbp - 160] + add rax, qword ptr [rbp - 992] + mov qword ptr [rbp - 1000], rax + mov rax, qword ptr [rbp - 152] + add rax, qword ptr [rbp - 1000] + mov qword ptr [rbp - 1008], rax + mov rax, qword ptr [rbp - 144] + add rax, qword ptr [rbp - 1008] + mov qword ptr [rbp - 1016], rax + movabs rax, 288230376151711743 + and rax, qword ptr [rbp - 1016] + mov qword ptr [rbp - 1024], rax + mov rax, qword ptr [rbp - 1016] + shr rax, 58 + mov byte ptr [rbp - 1025], al + mov rax, qword ptr [rbp - 136] + movzx ecx, byte ptr [rbp - 1025] + add rax, rcx + mov qword ptr [rbp - 1040], rax + mov rax, qword ptr [rbp - 128] + add rax, qword ptr [rbp - 1040] + mov qword ptr [rbp - 1048], rax + mov rax, qword ptr [rbp - 120] + add rax, qword ptr [rbp - 1048] + mov qword ptr [rbp - 1056], rax + mov rax, qword ptr [rbp - 112] + add rax, qword ptr [rbp - 1056] + mov qword ptr [rbp - 1064], rax + mov rax, qword ptr [rbp - 104] + add rax, qword ptr [rbp - 1064] + mov qword ptr [rbp - 1072], rax + mov rax, qword ptr [rbp - 96] + add rax, qword ptr [rbp - 1072] + mov qword ptr [rbp - 1080], rax + mov rax, qword ptr [rbp - 88] + add rax, qword ptr [rbp - 1080] + mov qword ptr [rbp - 1088], rax + mov rax, qword ptr [rbp - 72] + movzx ecx, byte ptr [rbp - 73] + add rax, rcx + mov qword ptr [rbp - 1096], rax + mov rax, qword ptr [rbp - 64] + add rax, qword ptr [rbp - 1096] + mov qword ptr [rbp - 1104], rax + mov rax, qword ptr [rbp - 56] + add rax, qword ptr [rbp - 1104] + mov qword ptr [rbp - 1112], rax + mov rax, qword ptr [rbp - 48] + add rax, qword ptr [rbp - 1112] + mov qword ptr [rbp - 1120], rax + mov rax, qword ptr [rbp - 40] + add rax, qword ptr [rbp - 1120] + mov qword ptr [rbp - 1128], rax + mov rax, qword ptr [rbp - 32] + add rax, qword ptr [rbp - 1128] + mov qword ptr [rbp - 1136], rax + mov rax, qword ptr [rbp - 24] + add rax, qword ptr [rbp - 1136] + mov qword ptr [rbp - 1144], rax + mov rcx, qword ptr [rbp - 608] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax], rcx + mov rcx, qword ptr [rbp - 680] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 8], rcx + mov rcx, qword ptr [rbp - 752] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 16], rcx + mov rcx, qword ptr [rbp - 816] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 24], rcx + mov rcx, qword ptr [rbp - 880] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 32], rcx + mov rcx, qword ptr [rbp - 952] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 40], rcx + mov rcx, qword ptr [rbp - 1024] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 48], rcx + mov rcx, qword ptr [rbp - 1088] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 56], rcx + mov rcx, qword ptr [rbp - 1144] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 64], rcx + add rsp, 1016 + pop rbp + ret diff --git a/fiat-amd64/fiat_p521_from_bytes/gcc_14_1_0.asm b/fiat-amd64/fiat_p521_from_bytes/gcc_14_1_0.asm new file mode 100644 index 0000000000..c33a80aae6 --- /dev/null +++ b/fiat-amd64/fiat_p521_from_bytes/gcc_14_1_0.asm @@ -0,0 +1,722 @@ + .globl _Z20fiat_p521_from_bytesPmPKh +_Z20fiat_p521_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 1032 + mov QWORD PTR [rbp-1144], rdi + mov QWORD PTR [rbp-1152], rsi + mov rax, QWORD PTR [rbp-1152] + add rax, 65 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-8], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 64 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-16], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 63 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-24], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 62 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-32], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 61 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-40], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 60 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-48], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 59 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-56], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 58 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-57], al + mov rax, QWORD PTR [rbp-1152] + add rax, 57 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 50 + mov QWORD PTR [rbp-72], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 56 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 42 + mov QWORD PTR [rbp-80], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 55 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 34 + mov QWORD PTR [rbp-88], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 54 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 26 + mov QWORD PTR [rbp-96], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 53 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 18 + mov QWORD PTR [rbp-104], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 52 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 10 + mov QWORD PTR [rbp-112], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 51 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 2 + mov QWORD PTR [rbp-120], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 50 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 52 + mov QWORD PTR [rbp-128], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 49 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 44 + mov QWORD PTR [rbp-136], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 48 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 36 + mov QWORD PTR [rbp-144], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 47 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 28 + mov QWORD PTR [rbp-152], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 46 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 20 + mov QWORD PTR [rbp-160], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 45 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 12 + mov QWORD PTR [rbp-168], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 44 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 4 + mov QWORD PTR [rbp-176], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 43 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 54 + mov QWORD PTR [rbp-184], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 42 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 46 + mov QWORD PTR [rbp-192], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 41 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 38 + mov QWORD PTR [rbp-200], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 40 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 30 + mov QWORD PTR [rbp-208], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 39 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 22 + mov QWORD PTR [rbp-216], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 38 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 14 + mov QWORD PTR [rbp-224], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 37 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 6 + mov QWORD PTR [rbp-232], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 36 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-240], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 35 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-248], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 34 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-256], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 33 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-264], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 32 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-272], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 31 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-280], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 30 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-288], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 29 + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-289], al + mov rax, QWORD PTR [rbp-1152] + add rax, 28 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 50 + mov QWORD PTR [rbp-304], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 27 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 42 + mov QWORD PTR [rbp-312], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 26 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 34 + mov QWORD PTR [rbp-320], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 25 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 26 + mov QWORD PTR [rbp-328], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 24 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 18 + mov QWORD PTR [rbp-336], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 23 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 10 + mov QWORD PTR [rbp-344], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 22 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 2 + mov QWORD PTR [rbp-352], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 21 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 52 + mov QWORD PTR [rbp-360], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 20 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 44 + mov QWORD PTR [rbp-368], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 19 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 36 + mov QWORD PTR [rbp-376], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 18 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 28 + mov QWORD PTR [rbp-384], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 17 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 20 + mov QWORD PTR [rbp-392], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 16 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 12 + mov QWORD PTR [rbp-400], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 15 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 4 + mov QWORD PTR [rbp-408], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 14 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 54 + mov QWORD PTR [rbp-416], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 13 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 46 + mov QWORD PTR [rbp-424], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 12 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 38 + mov QWORD PTR [rbp-432], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 11 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 30 + mov QWORD PTR [rbp-440], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 10 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 22 + mov QWORD PTR [rbp-448], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 9 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 14 + mov QWORD PTR [rbp-456], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 8 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 6 + mov QWORD PTR [rbp-464], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 7 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 56 + mov QWORD PTR [rbp-472], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 6 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 48 + mov QWORD PTR [rbp-480], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 5 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-488], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 4 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-496], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 3 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-504], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 2 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-512], rax + mov rax, QWORD PTR [rbp-1152] + add rax, 1 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-520], rax + mov rax, QWORD PTR [rbp-1152] + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-521], al + movzx edx, BYTE PTR [rbp-521] + mov rax, QWORD PTR [rbp-520] + add rax, rdx + mov QWORD PTR [rbp-536], rax + mov rdx, QWORD PTR [rbp-512] + mov rax, QWORD PTR [rbp-536] + add rax, rdx + mov QWORD PTR [rbp-544], rax + mov rdx, QWORD PTR [rbp-504] + mov rax, QWORD PTR [rbp-544] + add rax, rdx + mov QWORD PTR [rbp-552], rax + mov rdx, QWORD PTR [rbp-496] + mov rax, QWORD PTR [rbp-552] + add rax, rdx + mov QWORD PTR [rbp-560], rax + mov rdx, QWORD PTR [rbp-488] + mov rax, QWORD PTR [rbp-560] + add rax, rdx + mov QWORD PTR [rbp-568], rax + mov rdx, QWORD PTR [rbp-480] + mov rax, QWORD PTR [rbp-568] + add rax, rdx + mov QWORD PTR [rbp-576], rax + mov rdx, QWORD PTR [rbp-472] + mov rax, QWORD PTR [rbp-576] + add rax, rdx + mov QWORD PTR [rbp-584], rax + movabs rax, 288230376151711743 + and rax, QWORD PTR [rbp-584] + mov QWORD PTR [rbp-592], rax + mov rax, QWORD PTR [rbp-584] + shr rax, 58 + mov BYTE PTR [rbp-593], al + movzx edx, BYTE PTR [rbp-593] + mov rax, QWORD PTR [rbp-464] + add rax, rdx + mov QWORD PTR [rbp-608], rax + mov rdx, QWORD PTR [rbp-456] + mov rax, QWORD PTR [rbp-608] + add rax, rdx + mov QWORD PTR [rbp-616], rax + mov rdx, QWORD PTR [rbp-448] + mov rax, QWORD PTR [rbp-616] + add rax, rdx + mov QWORD PTR [rbp-624], rax + mov rdx, QWORD PTR [rbp-440] + mov rax, QWORD PTR [rbp-624] + add rax, rdx + mov QWORD PTR [rbp-632], rax + mov rdx, QWORD PTR [rbp-432] + mov rax, QWORD PTR [rbp-632] + add rax, rdx + mov QWORD PTR [rbp-640], rax + mov rdx, QWORD PTR [rbp-424] + mov rax, QWORD PTR [rbp-640] + add rax, rdx + mov QWORD PTR [rbp-648], rax + mov rdx, QWORD PTR [rbp-416] + mov rax, QWORD PTR [rbp-648] + add rax, rdx + mov QWORD PTR [rbp-656], rax + movabs rax, 288230376151711743 + and rax, QWORD PTR [rbp-656] + mov QWORD PTR [rbp-664], rax + mov rax, QWORD PTR [rbp-656] + shr rax, 58 + mov BYTE PTR [rbp-665], al + movzx edx, BYTE PTR [rbp-665] + mov rax, QWORD PTR [rbp-408] + add rax, rdx + mov QWORD PTR [rbp-680], rax + mov rdx, QWORD PTR [rbp-400] + mov rax, QWORD PTR [rbp-680] + add rax, rdx + mov QWORD PTR [rbp-688], rax + mov rdx, QWORD PTR [rbp-392] + mov rax, QWORD PTR [rbp-688] + add rax, rdx + mov QWORD PTR [rbp-696], rax + mov rdx, QWORD PTR [rbp-384] + mov rax, QWORD PTR [rbp-696] + add rax, rdx + mov QWORD PTR [rbp-704], rax + mov rdx, QWORD PTR [rbp-376] + mov rax, QWORD PTR [rbp-704] + add rax, rdx + mov QWORD PTR [rbp-712], rax + mov rdx, QWORD PTR [rbp-368] + mov rax, QWORD PTR [rbp-712] + add rax, rdx + mov QWORD PTR [rbp-720], rax + mov rdx, QWORD PTR [rbp-360] + mov rax, QWORD PTR [rbp-720] + add rax, rdx + mov QWORD PTR [rbp-728], rax + movabs rax, 288230376151711743 + and rax, QWORD PTR [rbp-728] + mov QWORD PTR [rbp-736], rax + mov rax, QWORD PTR [rbp-728] + shr rax, 58 + mov BYTE PTR [rbp-737], al + movzx edx, BYTE PTR [rbp-737] + mov rax, QWORD PTR [rbp-352] + add rax, rdx + mov QWORD PTR [rbp-752], rax + mov rdx, QWORD PTR [rbp-344] + mov rax, QWORD PTR [rbp-752] + add rax, rdx + mov QWORD PTR [rbp-760], rax + mov rdx, QWORD PTR [rbp-336] + mov rax, QWORD PTR [rbp-760] + add rax, rdx + mov QWORD PTR [rbp-768], rax + mov rdx, QWORD PTR [rbp-328] + mov rax, QWORD PTR [rbp-768] + add rax, rdx + mov QWORD PTR [rbp-776], rax + mov rdx, QWORD PTR [rbp-320] + mov rax, QWORD PTR [rbp-776] + add rax, rdx + mov QWORD PTR [rbp-784], rax + mov rdx, QWORD PTR [rbp-312] + mov rax, QWORD PTR [rbp-784] + add rax, rdx + mov QWORD PTR [rbp-792], rax + mov rdx, QWORD PTR [rbp-304] + mov rax, QWORD PTR [rbp-792] + add rax, rdx + mov QWORD PTR [rbp-800], rax + movzx edx, BYTE PTR [rbp-289] + mov rax, QWORD PTR [rbp-288] + add rax, rdx + mov QWORD PTR [rbp-808], rax + mov rdx, QWORD PTR [rbp-280] + mov rax, QWORD PTR [rbp-808] + add rax, rdx + mov QWORD PTR [rbp-816], rax + mov rdx, QWORD PTR [rbp-272] + mov rax, QWORD PTR [rbp-816] + add rax, rdx + mov QWORD PTR [rbp-824], rax + mov rdx, QWORD PTR [rbp-264] + mov rax, QWORD PTR [rbp-824] + add rax, rdx + mov QWORD PTR [rbp-832], rax + mov rdx, QWORD PTR [rbp-256] + mov rax, QWORD PTR [rbp-832] + add rax, rdx + mov QWORD PTR [rbp-840], rax + mov rdx, QWORD PTR [rbp-248] + mov rax, QWORD PTR [rbp-840] + add rax, rdx + mov QWORD PTR [rbp-848], rax + mov rdx, QWORD PTR [rbp-240] + mov rax, QWORD PTR [rbp-848] + add rax, rdx + mov QWORD PTR [rbp-856], rax + movabs rax, 288230376151711743 + and rax, QWORD PTR [rbp-856] + mov QWORD PTR [rbp-864], rax + mov rax, QWORD PTR [rbp-856] + shr rax, 58 + mov BYTE PTR [rbp-865], al + movzx edx, BYTE PTR [rbp-865] + mov rax, QWORD PTR [rbp-232] + add rax, rdx + mov QWORD PTR [rbp-880], rax + mov rdx, QWORD PTR [rbp-224] + mov rax, QWORD PTR [rbp-880] + add rax, rdx + mov QWORD PTR [rbp-888], rax + mov rdx, QWORD PTR [rbp-216] + mov rax, QWORD PTR [rbp-888] + add rax, rdx + mov QWORD PTR [rbp-896], rax + mov rdx, QWORD PTR [rbp-208] + mov rax, QWORD PTR [rbp-896] + add rax, rdx + mov QWORD PTR [rbp-904], rax + mov rdx, QWORD PTR [rbp-200] + mov rax, QWORD PTR [rbp-904] + add rax, rdx + mov QWORD PTR [rbp-912], rax + mov rdx, QWORD PTR [rbp-192] + mov rax, QWORD PTR [rbp-912] + add rax, rdx + mov QWORD PTR [rbp-920], rax + mov rdx, QWORD PTR [rbp-184] + mov rax, QWORD PTR [rbp-920] + add rax, rdx + mov QWORD PTR [rbp-928], rax + movabs rax, 288230376151711743 + and rax, QWORD PTR [rbp-928] + mov QWORD PTR [rbp-936], rax + mov rax, QWORD PTR [rbp-928] + shr rax, 58 + mov BYTE PTR [rbp-937], al + movzx edx, BYTE PTR [rbp-937] + mov rax, QWORD PTR [rbp-176] + add rax, rdx + mov QWORD PTR [rbp-952], rax + mov rdx, QWORD PTR [rbp-168] + mov rax, QWORD PTR [rbp-952] + add rax, rdx + mov QWORD PTR [rbp-960], rax + mov rdx, QWORD PTR [rbp-160] + mov rax, QWORD PTR [rbp-960] + add rax, rdx + mov QWORD PTR [rbp-968], rax + mov rdx, QWORD PTR [rbp-152] + mov rax, QWORD PTR [rbp-968] + add rax, rdx + mov QWORD PTR [rbp-976], rax + mov rdx, QWORD PTR [rbp-144] + mov rax, QWORD PTR [rbp-976] + add rax, rdx + mov QWORD PTR [rbp-984], rax + mov rdx, QWORD PTR [rbp-136] + mov rax, QWORD PTR [rbp-984] + add rax, rdx + mov QWORD PTR [rbp-992], rax + mov rdx, QWORD PTR [rbp-128] + mov rax, QWORD PTR [rbp-992] + add rax, rdx + mov QWORD PTR [rbp-1000], rax + movabs rax, 288230376151711743 + and rax, QWORD PTR [rbp-1000] + mov QWORD PTR [rbp-1008], rax + mov rax, QWORD PTR [rbp-1000] + shr rax, 58 + mov BYTE PTR [rbp-1009], al + movzx edx, BYTE PTR [rbp-1009] + mov rax, QWORD PTR [rbp-120] + add rax, rdx + mov QWORD PTR [rbp-1024], rax + mov rdx, QWORD PTR [rbp-112] + mov rax, QWORD PTR [rbp-1024] + add rax, rdx + mov QWORD PTR [rbp-1032], rax + mov rdx, QWORD PTR [rbp-104] + mov rax, QWORD PTR [rbp-1032] + add rax, rdx + mov QWORD PTR [rbp-1040], rax + mov rdx, QWORD PTR [rbp-96] + mov rax, QWORD PTR [rbp-1040] + add rax, rdx + mov QWORD PTR [rbp-1048], rax + mov rdx, QWORD PTR [rbp-88] + mov rax, QWORD PTR [rbp-1048] + add rax, rdx + mov QWORD PTR [rbp-1056], rax + mov rdx, QWORD PTR [rbp-80] + mov rax, QWORD PTR [rbp-1056] + add rax, rdx + mov QWORD PTR [rbp-1064], rax + mov rdx, QWORD PTR [rbp-72] + mov rax, QWORD PTR [rbp-1064] + add rax, rdx + mov QWORD PTR [rbp-1072], rax + movzx edx, BYTE PTR [rbp-57] + mov rax, QWORD PTR [rbp-56] + add rax, rdx + mov QWORD PTR [rbp-1080], rax + mov rdx, QWORD PTR [rbp-48] + mov rax, QWORD PTR [rbp-1080] + add rax, rdx + mov QWORD PTR [rbp-1088], rax + mov rdx, QWORD PTR [rbp-40] + mov rax, QWORD PTR [rbp-1088] + add rax, rdx + mov QWORD PTR [rbp-1096], rax + mov rdx, QWORD PTR [rbp-32] + mov rax, QWORD PTR [rbp-1096] + add rax, rdx + mov QWORD PTR [rbp-1104], rax + mov rdx, QWORD PTR [rbp-24] + mov rax, QWORD PTR [rbp-1104] + add rax, rdx + mov QWORD PTR [rbp-1112], rax + mov rdx, QWORD PTR [rbp-16] + mov rax, QWORD PTR [rbp-1112] + add rax, rdx + mov QWORD PTR [rbp-1120], rax + mov rdx, QWORD PTR [rbp-8] + mov rax, QWORD PTR [rbp-1120] + add rax, rdx + mov QWORD PTR [rbp-1128], rax + mov rax, QWORD PTR [rbp-1144] + mov rdx, QWORD PTR [rbp-592] + mov QWORD PTR [rax], rdx + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+8] + mov rax, QWORD PTR [rbp-664] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+16] + mov rax, QWORD PTR [rbp-736] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+24] + mov rax, QWORD PTR [rbp-800] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+32] + mov rax, QWORD PTR [rbp-864] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+40] + mov rax, QWORD PTR [rbp-936] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+48] + mov rax, QWORD PTR [rbp-1008] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+56] + mov rax, QWORD PTR [rbp-1072] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-1144] + lea rdx, [rax+64] + mov rax, QWORD PTR [rbp-1128] + mov QWORD PTR [rdx], rax + nop + leave + ret diff --git a/fiat-amd64/fiat_poly1305_from_bytes/clang_19_1_0_O0.asm b/fiat-amd64/fiat_poly1305_from_bytes/clang_19_1_0_O0.asm new file mode 100644 index 0000000000..1e03b9a796 --- /dev/null +++ b/fiat-amd64/fiat_poly1305_from_bytes/clang_19_1_0_O0.asm @@ -0,0 +1,149 @@ + .globl _Z24fiat_poly1305_from_bytesPmPKh +_Z24fiat_poly1305_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 184 + mov qword ptr [rbp - 8], rdi + mov qword ptr [rbp - 16], rsi + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 16] + shl rax, 41 + mov qword ptr [rbp - 24], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 15] + shl rax, 33 + mov qword ptr [rbp - 32], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 14] + shl rax, 25 + mov qword ptr [rbp - 40], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 13] + shl rax, 17 + mov qword ptr [rbp - 48], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 12] + shl rax, 9 + mov qword ptr [rbp - 56], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 11] + shl rax + mov qword ptr [rbp - 64], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 10] + shl rax, 36 + mov qword ptr [rbp - 72], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 9] + shl rax, 28 + mov qword ptr [rbp - 80], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 8] + shl rax, 20 + mov qword ptr [rbp - 88], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 7] + shl rax, 12 + mov qword ptr [rbp - 96], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 6] + shl rax, 4 + mov qword ptr [rbp - 104], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 5] + shl rax, 40 + mov qword ptr [rbp - 112], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 4] + shl rax, 32 + mov qword ptr [rbp - 120], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 3] + shl rax, 24 + mov qword ptr [rbp - 128], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 2] + shl rax, 16 + mov qword ptr [rbp - 136], rax + mov rax, qword ptr [rbp - 16] + movzx eax, byte ptr [rax + 1] + shl rax, 8 + mov qword ptr [rbp - 144], rax + mov rax, qword ptr [rbp - 16] + mov al, byte ptr [rax] + mov byte ptr [rbp - 145], al + mov rax, qword ptr [rbp - 144] + movzx ecx, byte ptr [rbp - 145] + add rax, rcx + mov qword ptr [rbp - 160], rax + mov rax, qword ptr [rbp - 136] + add rax, qword ptr [rbp - 160] + mov qword ptr [rbp - 168], rax + mov rax, qword ptr [rbp - 128] + add rax, qword ptr [rbp - 168] + mov qword ptr [rbp - 176], rax + mov rax, qword ptr [rbp - 120] + add rax, qword ptr [rbp - 176] + mov qword ptr [rbp - 184], rax + mov rax, qword ptr [rbp - 112] + add rax, qword ptr [rbp - 184] + mov qword ptr [rbp - 192], rax + movabs rax, 17592186044415 + and rax, qword ptr [rbp - 192] + mov qword ptr [rbp - 200], rax + mov rax, qword ptr [rbp - 192] + shr rax, 44 + mov byte ptr [rbp - 201], al + mov rax, qword ptr [rbp - 104] + movzx ecx, byte ptr [rbp - 201] + add rax, rcx + mov qword ptr [rbp - 216], rax + mov rax, qword ptr [rbp - 96] + add rax, qword ptr [rbp - 216] + mov qword ptr [rbp - 224], rax + mov rax, qword ptr [rbp - 88] + add rax, qword ptr [rbp - 224] + mov qword ptr [rbp - 232], rax + mov rax, qword ptr [rbp - 80] + add rax, qword ptr [rbp - 232] + mov qword ptr [rbp - 240], rax + mov rax, qword ptr [rbp - 72] + add rax, qword ptr [rbp - 240] + mov qword ptr [rbp - 248], rax + movabs rax, 8796093022207 + and rax, qword ptr [rbp - 248] + mov qword ptr [rbp - 256], rax + mov rax, qword ptr [rbp - 248] + shr rax, 43 + mov byte ptr [rbp - 257], al + mov rax, qword ptr [rbp - 64] + movzx ecx, byte ptr [rbp - 257] + add rax, rcx + mov qword ptr [rbp - 272], rax + mov rax, qword ptr [rbp - 56] + add rax, qword ptr [rbp - 272] + mov qword ptr [rbp - 280], rax + mov rax, qword ptr [rbp - 48] + add rax, qword ptr [rbp - 280] + mov qword ptr [rbp - 288], rax + mov rax, qword ptr [rbp - 40] + add rax, qword ptr [rbp - 288] + mov qword ptr [rbp - 296], rax + mov rax, qword ptr [rbp - 32] + add rax, qword ptr [rbp - 296] + mov qword ptr [rbp - 304], rax + mov rax, qword ptr [rbp - 24] + add rax, qword ptr [rbp - 304] + mov qword ptr [rbp - 312], rax + mov rcx, qword ptr [rbp - 200] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax], rcx + mov rcx, qword ptr [rbp - 256] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 8], rcx + mov rcx, qword ptr [rbp - 312] + mov rax, qword ptr [rbp - 8] + mov qword ptr [rax + 16], rcx + add rsp, 184 + pop rbp + ret \ No newline at end of file diff --git a/fiat-amd64/fiat_poly1305_from_bytes/gcc_14_1_0.asm b/fiat-amd64/fiat_poly1305_from_bytes/gcc_14_1_0.asm new file mode 100644 index 0000000000..602fc6df34 --- /dev/null +++ b/fiat-amd64/fiat_poly1305_from_bytes/gcc_14_1_0.asm @@ -0,0 +1,196 @@ + .globl _Z24fiat_poly1305_from_bytesPmPKh +_Z24fiat_poly1305_from_bytesPmPKh: + push rbp + mov rbp, rsp + sub rsp, 200 + mov QWORD PTR [rbp-312], rdi + mov QWORD PTR [rbp-320], rsi + mov rax, QWORD PTR [rbp-320] + add rax, 16 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 41 + mov QWORD PTR [rbp-8], rax + mov rax, QWORD PTR [rbp-320] + add rax, 15 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 33 + mov QWORD PTR [rbp-16], rax + mov rax, QWORD PTR [rbp-320] + add rax, 14 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 25 + mov QWORD PTR [rbp-24], rax + mov rax, QWORD PTR [rbp-320] + add rax, 13 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 17 + mov QWORD PTR [rbp-32], rax + mov rax, QWORD PTR [rbp-320] + add rax, 12 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 9 + mov QWORD PTR [rbp-40], rax + mov rax, QWORD PTR [rbp-320] + add rax, 11 + movzx eax, BYTE PTR [rax] + movzx eax, al + add rax, rax + mov QWORD PTR [rbp-48], rax + mov rax, QWORD PTR [rbp-320] + add rax, 10 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 36 + mov QWORD PTR [rbp-56], rax + mov rax, QWORD PTR [rbp-320] + add rax, 9 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 28 + mov QWORD PTR [rbp-64], rax + mov rax, QWORD PTR [rbp-320] + add rax, 8 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 20 + mov QWORD PTR [rbp-72], rax + mov rax, QWORD PTR [rbp-320] + add rax, 7 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 12 + mov QWORD PTR [rbp-80], rax + mov rax, QWORD PTR [rbp-320] + add rax, 6 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 4 + mov QWORD PTR [rbp-88], rax + mov rax, QWORD PTR [rbp-320] + add rax, 5 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 40 + mov QWORD PTR [rbp-96], rax + mov rax, QWORD PTR [rbp-320] + add rax, 4 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 32 + mov QWORD PTR [rbp-104], rax + mov rax, QWORD PTR [rbp-320] + add rax, 3 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 24 + mov QWORD PTR [rbp-112], rax + mov rax, QWORD PTR [rbp-320] + add rax, 2 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 16 + mov QWORD PTR [rbp-120], rax + mov rax, QWORD PTR [rbp-320] + add rax, 1 + movzx eax, BYTE PTR [rax] + movzx eax, al + sal rax, 8 + mov QWORD PTR [rbp-128], rax + mov rax, QWORD PTR [rbp-320] + movzx eax, BYTE PTR [rax] + mov BYTE PTR [rbp-129], al + movzx edx, BYTE PTR [rbp-129] + mov rax, QWORD PTR [rbp-128] + add rax, rdx + mov QWORD PTR [rbp-144], rax + mov rdx, QWORD PTR [rbp-120] + mov rax, QWORD PTR [rbp-144] + add rax, rdx + mov QWORD PTR [rbp-152], rax + mov rdx, QWORD PTR [rbp-112] + mov rax, QWORD PTR [rbp-152] + add rax, rdx + mov QWORD PTR [rbp-160], rax + mov rdx, QWORD PTR [rbp-104] + mov rax, QWORD PTR [rbp-160] + add rax, rdx + mov QWORD PTR [rbp-168], rax + mov rdx, QWORD PTR [rbp-96] + mov rax, QWORD PTR [rbp-168] + add rax, rdx + mov QWORD PTR [rbp-176], rax + movabs rax, 17592186044415 + and rax, QWORD PTR [rbp-176] + mov QWORD PTR [rbp-184], rax + mov rax, QWORD PTR [rbp-176] + shr rax, 44 + mov BYTE PTR [rbp-185], al + movzx edx, BYTE PTR [rbp-185] + mov rax, QWORD PTR [rbp-88] + add rax, rdx + mov QWORD PTR [rbp-200], rax + mov rdx, QWORD PTR [rbp-80] + mov rax, QWORD PTR [rbp-200] + add rax, rdx + mov QWORD PTR [rbp-208], rax + mov rdx, QWORD PTR [rbp-72] + mov rax, QWORD PTR [rbp-208] + add rax, rdx + mov QWORD PTR [rbp-216], rax + mov rdx, QWORD PTR [rbp-64] + mov rax, QWORD PTR [rbp-216] + add rax, rdx + mov QWORD PTR [rbp-224], rax + mov rdx, QWORD PTR [rbp-56] + mov rax, QWORD PTR [rbp-224] + add rax, rdx + mov QWORD PTR [rbp-232], rax + movabs rax, 8796093022207 + and rax, QWORD PTR [rbp-232] + mov QWORD PTR [rbp-240], rax + mov rax, QWORD PTR [rbp-232] + shr rax, 43 + mov BYTE PTR [rbp-241], al + movzx edx, BYTE PTR [rbp-241] + mov rax, QWORD PTR [rbp-48] + add rax, rdx + mov QWORD PTR [rbp-256], rax + mov rdx, QWORD PTR [rbp-40] + mov rax, QWORD PTR [rbp-256] + add rax, rdx + mov QWORD PTR [rbp-264], rax + mov rdx, QWORD PTR [rbp-32] + mov rax, QWORD PTR [rbp-264] + add rax, rdx + mov QWORD PTR [rbp-272], rax + mov rdx, QWORD PTR [rbp-24] + mov rax, QWORD PTR [rbp-272] + add rax, rdx + mov QWORD PTR [rbp-280], rax + mov rdx, QWORD PTR [rbp-16] + mov rax, QWORD PTR [rbp-280] + add rax, rdx + mov QWORD PTR [rbp-288], rax + mov rdx, QWORD PTR [rbp-8] + mov rax, QWORD PTR [rbp-288] + add rax, rdx + mov QWORD PTR [rbp-296], rax + mov rax, QWORD PTR [rbp-312] + mov rdx, QWORD PTR [rbp-184] + mov QWORD PTR [rax], rdx + mov rax, QWORD PTR [rbp-312] + lea rdx, [rax+8] + mov rax, QWORD PTR [rbp-240] + mov QWORD PTR [rdx], rax + mov rax, QWORD PTR [rbp-312] + lea rdx, [rax+16] + mov rax, QWORD PTR [rbp-296] + mov QWORD PTR [rdx], rax + nop + leave + ret \ No newline at end of file