Skip to content

Commit

Permalink
_
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed May 31, 2024
1 parent cf4c173 commit b3239ee
Showing 1 changed file with 80 additions and 48 deletions.
128 changes: 80 additions & 48 deletions src/msm_avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,17 @@ inline uint8_t cvtToInt(const Vmask& v)

inline void dump(const Vmask& v, const char *msg = nullptr)
{
mcl::bint::dump(&v, sizeof(v), msg);
if (msg) printf("%s ", msg);
uint64_t x = cvtToInt(v);
for (size_t i = 0; i < 8; i++) {
putchar('0' + ((x>>(7-i))&1));
}
putchar('\n');
}

inline void dump(const Vec& v, const char *msg = nullptr)
{
mcl::bint::dump((const uint64_t*)&v, sizeof(v)/sizeof(uint64_t), msg);
}

template<size_t N, int w = W>
Expand Down Expand Up @@ -87,6 +97,13 @@ inline Vec vzero()
return _mm512_setzero_epi32();
}

inline Vmask mzero()
{
Vmask v;
memset(&v, 0, sizeof(v));
return v;
}

inline Vec vone()
{
return _mm512_set1_epi32(1);
Expand Down Expand Up @@ -160,11 +177,21 @@ inline Vec vadd(const Vec& a, const Vec& b)
return _mm512_add_epi64(a, b);
}

inline Vec vadd(const Vmask& v, const Vec& a, const Vec& b)
{
return _mm512_mask_add_epi64(a, v, a, b);
}

inline Vec vsub(const Vec& a, const Vec& b)
{
return _mm512_sub_epi64(a, b);
}

inline Vec vsub(const Vmask& v, const Vec& a, const Vec& b)
{
return _mm512_mask_sub_epi64(a, v, a, b);
}

inline Vec vpsrlq(const Vec& a, size_t b)
{
return _mm512_srli_epi64(a, int(b));
Expand Down Expand Up @@ -911,7 +938,7 @@ inline void addJacobiMixedNoCheck(E& R, const E& P, const E& Q)
}

// 12M+4S+7A
// assume P.x != Q.x, P != Q
// P == Q or P == -Q then R = 0, so assume P != Q.
template<class E>
inline void addJacobiNoCheck(E& R, const E& P, const E& Q)
{
Expand Down Expand Up @@ -1106,7 +1133,7 @@ struct EcM {
Q.y = P.y;
Q.z = P.z;
}
#if 1
#if 0
// Treat idx as an unsigned integer
template<bool isProj=true, bool mixed=false>
static void mulGLV(EcM& Q, const EcM& P, const Vec y[4])
Expand Down Expand Up @@ -1162,42 +1189,35 @@ struct EcM {
}
#else
template<size_t bitLen, size_t w>
static void makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2])
static Vmask makeNAFtbl(Vec *idxTbl, Vmask *negTbl, const Vec a[2])
{
const Vec vmask = vpbroadcastq((1<<w)-1);
const Vec W = vpbroadcastq(1<<w);
const Vec H = vpbroadcastq(1<<(w-1));
const size_t remain = bitLen % w;
const Vec one = vpbroadcastq(1);
size_t pos = 0;
Vec CF = vzero();
size_t i = 0;
while (pos < bitLen) {
Vmask CF = mzero();
const size_t n = (bitLen+w-1)/w;
for (size_t i = 0; i < n; i++) {
Vec idx = getUnitAt(a, 2, pos);
if (pos == 0 && remain) {
// always CF = 0
idx = vand(idx, vpbroadcastq((1<<remain)-1));
negTbl[i] = vcmpge(idx, H); // = 0 because always idx < H
pos = remain;
} else {
idx = vadd(idx, CF);
CF = vpsrlq(idx, w);
idx = vand(idx, vmask);
negTbl[i] = vcmpge(idx, H);
idx = vselect(negTbl[i], vsub(W, idx), idx); // idx >= H ? W - idx : idx;
pos += w;
}
idx = vand(idx, vmask);
idx = vadd(CF, idx, one);
Vec masked = vand(idx, vmask);
negTbl[i] = vcmpge(masked, H);
idx = vselect(negTbl[i], vsub(W, masked), masked); // idx >= H ? W - idx : idx;
idxTbl[i] = idx;
i++;
CF = vcmpge(idx, W);
CF = mor(negTbl[i], CF);
pos += w;
}
assert(i == (bitLen+w-1)/w);
return CF;
}
// Treat idx as a signed integer
template<bool isProj=true, bool mixed=false>
static void mulGLV(EcM& Q, const EcM& P, const Vec y[4])
{
const size_t w = 4;
const size_t tblN = (1<<(w-1))+1;
// const size_t tblN = 1<<w;
const size_t w = 5;
const size_t tblN = (1<<(w-1))+1; // [0, 2^(w-1)]
Vec a[2], b[2];
EcM tbl1[tblN], tbl2[tblN];
makeTable<w, isProj, mixed>(tbl1, P);
Expand All @@ -1219,28 +1239,29 @@ struct EcM {
const size_t n = (bitLen + w-1)/w;
Vec aTbl[n], bTbl[n];
Vmask aNegTbl[n], bNegTbl[n];
makeNAFtbl<bitLen, w>(aTbl, aNegTbl, a);
makeNAFtbl<bitLen, w>(bTbl, bNegTbl, b);
Vmask CF1 = makeNAFtbl<bitLen, w>(aTbl, aNegTbl, a);
Vmask CF2 = makeNAFtbl<bitLen, w>(bTbl, bNegTbl, b);

const size_t remain = bitLen % w;
bool first = true;
for (int i = int(n)-1; i >= 0; i--) {
size_t dblN = (i == 0 && remain) ? remain : w;
if (!first) for (size_t k = 0; k < dblN; k++) EcM::dbl<isProj>(Q, Q);
assert(cvtToInt(CF1) == 0);
assert(cvtToInt(CF2) == 0);
(void)CF1;
(void)CF2;
for (size_t i = 0; i < n; i++) {
if (i > 0) for (size_t k = 0; k < w; k++) EcM::dbl<isProj>(Q, Q);
const size_t pos = n-1-i;

EcM T;
Vec idx = bTbl[i];
Vec idx = bTbl[pos];
T.gather(tbl2, idx);
T.y = FpM::select(bNegTbl[i], T.y.neg(), T.y);
if (first) {
T.y = FpM::select(bNegTbl[pos], T.y.neg(), T.y);
if (i == 0) {
Q = T;
first = false;
} else {
add<isProj, mixed>(Q, Q, T);
}
idx = aTbl[i];
idx = aTbl[pos];
T.gather(tbl1, idx);
T.y = FpM::select(aNegTbl[i], T.y.neg(), T.y);
T.y = FpM::select(aNegTbl[pos], T.y.neg(), T.y);
add<isProj, mixed>(Q, Q, T);
}
}
Expand Down Expand Up @@ -1401,7 +1422,7 @@ void mulVecAVX512(Unit *_P, Unit *_x, const Unit *_y, size_t n)
void mulEachAVX512(Unit *_x, const Unit *_y, size_t n)
{
assert(n % 8 == 0);
const bool isProj = false;
const bool isProj = true;
const bool mixed = true;
mcl::msm::G1A *x = (mcl::msm::G1A*)_x;
const mcl::msm::FrA *y = (const mcl::msm::FrA*)_y;
Expand Down Expand Up @@ -1538,7 +1559,7 @@ CYBOZU_TEST_AUTO(cmp)

CYBOZU_TEST_AUTO(op)
{
const size_t n = 8;
const size_t n = 8; // fixed
G1 P[n];
G1 Q[n];
G1 R[n];
Expand Down Expand Up @@ -1621,14 +1642,25 @@ CYBOZU_TEST_AUTO(op)
}
#if 1
// mulEachAVX512
for (size_t i = 0; i < n; i++) {
Q[i] = P[i];
G1::mul(R[i], P[i], x[i]);
}
mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n);
for (size_t i = 0; i < n; i++) {
CYBOZU_TEST_EQUAL(R[i], Q[i]);
for (int t = 0; t < 0x1000; t += 8) {
for (size_t i = 0; i < n; i++) {
Q[i] = P[i];
x[i] = t + i;
G1::mul(R[i], P[i], x[i]);
}
mcl::msm::mulEachAVX512((Unit*)Q, (const Unit*)x, n);
for (size_t i = 0; i < n; i++) {
CYBOZU_TEST_EQUAL(R[i], Q[i]);
#if 1
if (R[i] != Q[i]) {
printf("x[%zd]=%s\n", i, x[i].getStr(16).c_str());
printf("ok %s\n", R[i].getStr(mcl::IoEcAffine|16).c_str());
printf("ng %s\n", Q[i].getStr(mcl::IoEcAffine|16).c_str());
}
#endif
}
}
exit(1);
#endif
}

Expand Down

0 comments on commit b3239ee

Please sign in to comment.