[doc] update GLV algo for SIMD

herumi · May 15, 2024 · d1efd49 · d1efd49
1 parent b3f5713
commit d1efd49
Showing 1 changed file with 38 additions and 16 deletions.
diff --git a/misc/internal.md b/misc/internal.md
@@ -45,33 +45,42 @@ bit_length|64|128|255|255|128
 
 ### Split function
 ```python
+adj = False
 def split(x):
     b = (x * v) >> s
     a = x - b * L
+    if adj:
+      if a >= L:
+        a -= L
+        b += 1
     return (a, b)
 ```
+- x in [0, r-1]
 - a + b L = x for (a, b) = split(x).
 
 ### Theorem
-0 <= a, b < H for all x in [0, M-r].
+0 <= a < 1.11 L < H and 0 <= b < L+1 for x in [0, r-1].
 
 ### Proof
 
 ```
-Let r0 := L S % r, then S=v L + r0 and r0 in [0, L-1].
+Let r0 := L S % r, then S=v L + r0 and r0 in [0, L-1]. In fact, r0 ~ 0.11 L.
 Let r1 := x v % S, then x v = b S + r1 and r1 in [0, S-1].
 ```
 
 ```
-b <=  xv / S < (M-r) (S/L)/S = (M-r)/L < H.
+b <=  xv / S < (r-1) (S/L)/S = (r-1)/L = L+1.
 ```
 
 ```
 aS = (x - bL)S = xS - bSL = xS - (xv - r1)L = x(S - vL) + r1 L = r0 x + r1 L
-     <= r0 (M-r) + (S-1)L < S H.
+     <= r0 (r-1) + (S-1)L = S L + (r-1)r0 - L.
+a <= L + ((r-1)r0 - L)/S
+((r-1)r0 - L)/S ~ 0.10016 L < 0.11 L.
 ```
-Then, a < H.
-So for x in [0, M-1], set x = x - r if x >= H and apply split() to x.
+### Remark
+If adj is true, then a is in [0, L-1].
+
 
 ## window size
 - 128-bit (Fr is 256 bit and use GLV method)
@@ -88,14 +97,15 @@ f(w)|130|68|51|48|58|86
 
 argmin f(w) = 4
 
-## Use projective coordinates
+## Selection of coordinates
 
-- psuedo code of GLV method
+### psuedo code of GLV method
 
 ```python
 def mul(P, x):
-  (a, b) = split(x)
-  # a, b < 1<<128
+  assert(0 <= x < r)
+  (a, b) = split(x) # x = a + b L
+  # a, b < H=1<<128
   w = 4
   for i in range(1<<w):
     tbl1[i] = P * i
@@ -105,15 +115,27 @@ def mul(P, x):
   Q = 0
   for i in range(128//w):
     for j in range(w):
-      Q = dbl(Q)
+      Q = dbl(Q)         ### AAA
     j1 = (a >> (w*i)) & mask
-    j2 = (b >> (w*i)) & mask ### AAA
-    Q = add(Q, tbl1[j1])
-    Q = add(Q, tbl2[j2])
+    j2 = (b >> (w*i)) & mask
+    Q = add(Q, tbl2[j2]) # ADD1
+    Q = add(Q, tbl1[j1]) # ADD2
   return Q
 ```
-The values of tbl1[i] are 0, P, ..., 15P, and the values of tbl2[i] are 0, LP, ... , 15LP.
-Since L is odd and Q is a multiple of 16 just before AAA, Q != tbl1[j1] and Q != tbl2[j2]. So we can omit the ehckd of x == y in add(x, y).
+Note that the value of tbl2 is added first.
+
+### Theorem
+We can use Jacobi additive formula add(P, Q) assuming P != Q and P+Q != 0.
+
+Proof.
+
+During the calculation, Q is monotonic increase and always in [0, P, ..., (r-1)P].
+
+- ADD1 : tbl2[] is in [0, L P, ..., 15 L P] and L is odd.
+After computing AAA, Q is a multiple of 16 P, so Q != tbl2[j2].
+- ADD2 : tbl1[] is in [0, P, ..., 15 P].
+After computing ADD1, if the immediately preceding tbl2[j2] is 0, then then Q is a multiple of 16 P, so Q != tbl1[j1].
+Otherwise, Q is bigger than L P, so Q != tbl1[j1].
 
 ## Jacobi and Proj
 `sqr` is equal to `mul` on AVX-512.