Skip to content

Commit

Permalink
another mulEach
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed May 31, 2024
1 parent b3239ee commit 72634d4
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 42 deletions.
35 changes: 27 additions & 8 deletions misc/internal.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,7 @@ s = r.bit_length()
S = 1<<s
v = S // L
r0 = S % L
```

variables|z|L|r|S|v
-|-|-|-|-|-
bit_length|64|128|255|255|128


### Split function
```python
adj = False
def split(x):
b = (x * v) >> s
Expand All @@ -55,6 +47,11 @@ def split(x):
b += 1
return (a, b)
```

variables|z|L|r|S|v
-|-|-|-|-|-
bit_length|64|128|255|255|128

- x in [0, r-1]
- a + b L = x for (a, b) = split(x).

Expand Down Expand Up @@ -144,3 +141,25 @@ Otherwise, Q is bigger than L P, so Q != tbl1[j1].
-|-|-
Proj|12M+27A|8M+13A
Jacobi|16M+7A|7M+12A

## NAF (Non-Adjacent Form)

```
def naf(x, w=3):
tbl = []
H=2**(w-1)
W=H*2
mask = W-1
while x >= 1:
if x & 1:
t = x & mask
if t >= H:
t -= W
x = x - t
else:
t = 0
x = x >> 1
tbl.append(t)
tbl.reverse()
return tbl
```
65 changes: 31 additions & 34 deletions src/msm_avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,11 @@ struct EcM {
tbl[1] = P;
dbl<isProj>(tbl[2], P);
for (size_t i = 3; i < tblN; i++) {
add<isProj, mixed>(tbl[i], tbl[i-1], P);
if (i & 1) {
add<isProj, mixed>(tbl[i], tbl[i-1], P);
} else {
dbl<isProj>(tbl[i], tbl[i/2]);
}
}
}
void gather(const EcM *tbl, Vec idx)
Expand All @@ -1133,8 +1137,9 @@ struct EcM {
Q.y = P.y;
Q.z = P.z;
}
#if 0
#if 1
// Treat idx as an unsigned integer
// 33.6M clk
template<bool isProj=true, bool mixed=false>
static void mulGLV(EcM& Q, const EcM& P, const Vec y[4])
{
Expand Down Expand Up @@ -1189,30 +1194,29 @@ struct EcM {
}
#else
template<size_t bitLen, size_t w>
static Vmask makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2])
static void makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2])
{
const Vec vmask = vpbroadcastq((1<<w)-1);
const Vec W = vpbroadcastq(1<<w);
const Vec F = vpbroadcastq(1<<w);
const Vec H = vpbroadcastq(1<<(w-1));
const Vec one = vpbroadcastq(1);
size_t pos = 0;
Vmask CF = mzero();
Vec CF = vzero();
const size_t n = (bitLen+w-1)/w;
for (size_t i = 0; i < n; i++) {
Vec idx = getUnitAt(a, 2, pos);
idx = vand(idx, vmask);
idx = vadd(CF, idx, one);
idx = vadd(idx, CF);
Vec masked = vand(idx, vmask);
negTbl[i] = vcmpge(masked, H);
idx = vselect(negTbl[i], vsub(W, masked), masked); // idx >= H ? W - idx : idx;
idxTbl[i] = idx;
CF = vcmpge(idx, W);
CF = mor(negTbl[i], CF);
idxTbl[i] = vselect(negTbl[i], vsub(F, masked), masked); // idx >= H ? F - idx : idx;
CF = vpsrlq(idx, w);
CF = vadd(negTbl[i], CF, one);
pos += w;
}
return CF;
}
// Treat idx as a signed integer
// 34.6M clk
template<bool isProj=true, bool mixed=false>
static void mulGLV(EcM& Q, const EcM& P, const Vec y[4])
{
Expand All @@ -1239,13 +1243,9 @@ struct EcM {
const size_t n = (bitLen + w-1)/w;
Vec aTbl[n], bTbl[n];
Vmask aNegTbl[n], bNegTbl[n];
Vmask CF1 = makeNAFtbl<bitLen, w>(aTbl, aNegTbl, a);
Vmask CF2 = makeNAFtbl<bitLen, w>(bTbl, bNegTbl, b);
makeNAFtbl<bitLen, w>(aTbl, aNegTbl, a);
makeNAFtbl<bitLen, w>(bTbl, bNegTbl, b);
assert(cvtToInt(CF1) == 0);
assert(cvtToInt(CF2) == 0);
(void)CF1;
(void)CF2;
for (size_t i = 0; i < n; i++) {
if (i > 0) for (size_t k = 0; k < w; k++) EcM::dbl<isProj>(Q, Q);
const size_t pos = n-1-i;
Expand Down Expand Up @@ -1422,7 +1422,7 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n)
void mulEachAVX512(Unit *_x, const Unit *_y, size_t n)
{
assert(n % 8 == 0);
const bool isProj = true;
const bool isProj = false;
const bool mixed = true;
mcl::msm::G1A *x = (mcl::msm::G1A*)_x;
const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y;
Expand Down Expand Up @@ -1642,25 +1642,22 @@ CYBOZU_TEST_AUTO(op)
}
#if 1
// mulEachAVX512
for (int t = 0; t < 0x1000; t += 8) {
for (size_t i = 0; i < n; i++) {
Q[i] = P[i];
x[i] = t + i;
G1::mul(R[i], P[i], x[i]);
}
mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n);
for (size_t i = 0; i < n; i++) {
CYBOZU_TEST_EQUAL(R[i], Q[i]);
#if 1
if (R[i] != Q[i]) {
printf("x[%zd]=%s\n", i, x[i].getStr(16).c_str());
printf("ok %s\n", R[i].getStr(mcl::IoEcAffine|16).c_str());
printf("ng %s\n", Q[i].getStr(mcl::IoEcAffine|16).c_str());
for (int mode = 0; mode < 2; mode++) {
for (int t = 0; t < 0x1000; t += 8) {
for (size_t i = 0; i < n; i++) {
Q[i] = P[i];
switch (mode) {
case 0: x[i] = t + i; break;
case 1: x[i].setByCSPRNG(rg); break;
}
G1::mul(R[i], P[i], x[i]);
}
mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n);
for (size_t i = 0; i < n; i++) {
CYBOZU_TEST_EQUAL(R[i], Q[i]);
}
#endif
}
}
exit(1);
#endif
}

Expand Down

0 comments on commit 72634d4

Please sign in to comment.