From be17da7bec443bc084f14a2d7d37a07d2ef6ffdd Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 26 Aug 2024 17:46:00 +0900 Subject: [PATCH] add vmulUnitAdd --- src/gen_bint_x64.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/gen_bint_x64.py b/src/gen_bint_x64.py index cb9fa3e4..6504584b 100644 --- a/src/gen_bint_x64.py +++ b/src/gen_bint_x64.py @@ -191,7 +191,7 @@ def vmulL(z, x, y): def vmulH(z, x, y): vpmadd52huq(z, x, y) -# [H:z] = x[] * y +# z[0:N+1] = x[0:N] * y def vmulUnit(z, px, y, N, H, t): vpxorq(z[0], z[0], z[0]) vmovdqa64(t, ptr(px)) @@ -202,9 +202,29 @@ def vmulUnit(z, px, y, N, H, t): vmovdqa64(z[i], H) vmovdqa64(t, ptr(px+i*64)) vmulL(z[i], t, y) - vpxorq(H, H, H) - vmulH(H, t, y) + if i < N-1: + vpxorq(H, H, H) + vmulH(H, t, y) + else: + vpxorq(z[N], z[N], z[N]) + vmulH(z[N], t, y) +# [H]:z[0:N] = z[0:N] + x[] * y +def vmulUnitAdd(z, px, y, N, H, t): + vmovdqa64(t, ptr(px)) + vmulL(z[0], t, y) + vpxorq(H, H, H) + vmulH(H, t, y) + for i in range(1, N): + vmovdqa64(t, ptr(px+i*64)) + vmulL(z[i], t, y) + vpaddq(z[i], z[i], H) + if i < N-1: + vpxorq(H, H, H) + vmulH(H, t, y) + else: + vpxorq(z[N], z[N], z[N]) + vmulH(z[N], t, y) def msm_data(mont): makeLabel(C_p)