Skip to content

Commit

Permalink
gosthash2012: Import and merge MMX implementations
Browse files Browse the repository at this point in the history
Merged and fixed two MMX implementations. For example,
[1] uses SSE2 register types `__m128i',
[2] GCC's `mmintrin.h' defines `_mm_cvtsi64_m64' only for `__x86_64__',
    but we need MMX exactly for IA-32, since x86_64 it have SSE2 in
    baseline.

Link: https://github.com/adegtyarev/streebog
Link: https://github.com/sjinks/php-stribog
Signed-off-by: Vitaly Chikunov <[email protected]>
  • Loading branch information
vt-alt committed Nov 30, 2021
1 parent 767c693 commit 36e84c3
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ set(GOST_HASH_2012_SOURCE_FILES
gosthash2012_const.h
gosthash2012_precalc.h
gosthash2012_ref.c
gosthash2012_mmx.c
gosthash2012_sse2.c
gosthash2012_sse41.c
)
Expand Down
5 changes: 5 additions & 0 deletions gosthash2012.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,16 @@ void g(union uint512_u *h, const union uint512_u * RESTRICT N,
if (__builtin_cpu_supports("sse2"))
return g_sse2(h, N, m);
# endif
# if defined __GOST3411_HAS_MMX__
if (__builtin_cpu_supports("mmx"))
return g_mmx(h, N, m);
# endif
# if defined __GOST3411_HAS_REF__
g_ref(h, N, m);
# endif
# if !defined __GOST3411_HAS_SSE41__ && \
!defined __GOST3411_HAS_SSE2__ && \
!defined __GOST3411_HAS_MMX__ && \
!defined __GOST3411_HAS_REF__
# error "No dynamic implementation of g() is selected."
# endif
Expand Down
7 changes: 7 additions & 0 deletions gosthash2012.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define __GOST3411_HAS_REF__

#if defined __x86_64__ || defined __i386__
# define __GOST3411_HAS_MMX__
# define __GOST3411_HAS_SSE2__
# define __GOST3411_HAS_SSE41__
#elif defined __SSE2__
Expand Down Expand Up @@ -42,6 +43,7 @@
* be disabled with -mno-sse2.
*/
# undef __GOST3411_HAS_REF__
# undef __GOST3411_HAS_MMX__
# endif
#endif

Expand Down Expand Up @@ -108,6 +110,11 @@ _internal
void g_ref(union uint512_u *h, const union uint512_u * RESTRICT N,
const union uint512_u * RESTRICT m);
#endif
#ifdef __GOST3411_HAS_MMX__
_internal _target("mmx")
void g_mmx(union uint512_u *h, const union uint512_u * RESTRICT N,
const union uint512_u * RESTRICT m);
#endif
#ifdef __GOST3411_HAS_SSE2__
_internal _target("sse2")
void g_sse2(union uint512_u *h, const union uint512_u * RESTRICT N,
Expand Down
143 changes: 143 additions & 0 deletions gosthash2012_mmx.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
* Copyright (c) 2013, Alexey Degtyarev <[email protected]>.
* Implementation fixed based on php-stribog:
* Copyright (c) 2013 Vladimir Kolesnikov.
* SPDX-License-Identifier: BSD-2-Clause AND MIT
* Copyright (c) 2021 Vitaly Chikunov <[email protected]>.
* All rights reserved.
*
* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+
*/

#include "gosthash2012.h"
#ifdef __GOST3411_HAS_MMX__

#include <mmintrin.h>

#define XLPS XLPS32

#define X(x, y, z) { \
z->QWORD[0] = x->QWORD[0] ^ y->QWORD[0]; \
z->QWORD[1] = x->QWORD[1] ^ y->QWORD[1]; \
z->QWORD[2] = x->QWORD[2] ^ y->QWORD[2]; \
z->QWORD[3] = x->QWORD[3] ^ y->QWORD[3]; \
z->QWORD[4] = x->QWORD[4] ^ y->QWORD[4]; \
z->QWORD[5] = x->QWORD[5] ^ y->QWORD[5]; \
z->QWORD[6] = x->QWORD[6] ^ y->QWORD[6]; \
z->QWORD[7] = x->QWORD[7] ^ y->QWORD[7]; \
}

#define XLOAD(x, y, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7) { \
const __m64 *px = (const __m64 *) &x[0]; \
const __m64 *py = (const __m64 *) &y[0]; \
mm0 = _mm_xor_si64(px[0], py[0]); \
mm1 = _mm_xor_si64(px[1], py[1]); \
mm2 = _mm_xor_si64(px[2], py[2]); \
mm3 = _mm_xor_si64(px[3], py[3]); \
mm4 = _mm_xor_si64(px[4], py[4]); \
mm5 = _mm_xor_si64(px[5], py[5]); \
mm6 = _mm_xor_si64(px[6], py[6]); \
mm7 = _mm_xor_si64(px[7], py[7]); \
}

#define STORE(P, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7) { \
unsigned long long *__m64p = &P->QWORD[0]; \
__m64p[0] = (unsigned long long)(mm0); \
__m64p[1] = (unsigned long long)(mm1); \
__m64p[2] = (unsigned long long)(mm2); \
__m64p[3] = (unsigned long long)(mm3); \
__m64p[4] = (unsigned long long)(mm4); \
__m64p[5] = (unsigned long long)(mm5); \
__m64p[6] = (unsigned long long)(mm6); \
__m64p[7] = (unsigned long long)(mm7); \
}

#define TRANSPOSE(mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7) { \
__m64 tm0, tm1, tm2, tm3, tm4, tm5, tm6, tm7; \
tm0 = _mm_unpacklo_pi8(mm0, mm2); \
tm1 = _mm_unpackhi_pi8(mm0, mm2); \
tm2 = _mm_unpacklo_pi8(mm1, mm3); \
tm3 = _mm_unpackhi_pi8(mm1, mm3); \
tm4 = _mm_unpacklo_pi8(mm4, mm6); \
tm5 = _mm_unpackhi_pi8(mm4, mm6); \
tm6 = _mm_unpacklo_pi8(mm5, mm7); \
tm7 = _mm_unpackhi_pi8(mm5, mm7); \
\
mm0 = _mm_unpacklo_pi8(tm0, tm2); \
mm1 = _mm_unpackhi_pi8(tm0, tm2); \
mm2 = _mm_unpacklo_pi8(tm1, tm3); \
mm3 = _mm_unpackhi_pi8(tm1, tm3); \
mm4 = _mm_unpacklo_pi8(tm4, tm6); \
mm5 = _mm_unpackhi_pi8(tm4, tm6); \
mm6 = _mm_unpacklo_pi8(tm5, tm7); \
mm7 = _mm_unpackhi_pi8(tm5, tm7); \
\
tm2 = _mm_unpacklo_pi32(mm1, mm5); \
tm3 = _mm_unpackhi_pi32(mm1, mm5); \
tm0 = _mm_unpacklo_pi32(mm0, mm4); \
tm1 = _mm_unpackhi_pi32(mm0, mm4); \
mm4 = _mm_unpacklo_pi32(mm2, mm6); \
mm5 = _mm_unpackhi_pi32(mm2, mm6); \
mm6 = _mm_unpacklo_pi32(mm3, mm7); \
mm7 = _mm_unpackhi_pi32(mm3, mm7); \
mm0 = tm0; \
mm1 = tm1; \
mm2 = tm2; \
mm3 = tm3; \
}

#define XTRANSPOSE(x, y, z) { \
__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; \
XLOAD(x, y, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7); \
TRANSPOSE(mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7); \
STORE(z, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7); \
}
#define XLPS32(x, y, data) { \
unsigned int xi; \
unsigned char *p; \
ALIGN(16) union uint512_u buf; \
XTRANSPOSE(x, y, (&buf)); \
p = (unsigned char *) &buf; \
for (xi = 0; xi < 8; xi++) \
{ \
__m64 mm0 = (__m64)(Ax[0][*(p++)]); \
mm0 = _mm_xor_si64(mm0, (__m64)(Ax[1][*(p++)])); \
mm0 = _mm_xor_si64(mm0, (__m64)(Ax[2][*(p++)])); \
mm0 = _mm_xor_si64(mm0, (__m64)(Ax[3][*(p++)])); \
mm0 = _mm_xor_si64(mm0, (__m64)(Ax[4][*(p++)])); \
mm0 = _mm_xor_si64(mm0, (__m64)(Ax[5][*(p++)])); \
mm0 = _mm_xor_si64(mm0, (__m64)(Ax[6][*(p++)])); \
mm0 = _mm_xor_si64(mm0, (__m64)(Ax[7][*(p++)])); \
data->QWORD[xi] = (unsigned long long) mm0; \
} \
}

#define ROUND(i, Ki, data) { \
XLPS(Ki, (&C[i]), Ki); \
XLPS(Ki, data, data); \
}

void g_mmx(union uint512_u *h, const union uint512_u * RESTRICT N,
const union uint512_u * RESTRICT m)
{
union uint512_u Ki, data;
unsigned int i;

XLPS(h, N, (&data));

/* Starting E() */
Ki = data;
XLPS((&Ki), ((const union uint512_u *)&m[0]), (&data));

for (i = 0; i < 11; i++)
ROUND(i, (&Ki), (&data));

XLPS((&Ki), (&C[11]), (&Ki));
X((&Ki), (&data), (&data));
/* E() done */

X((&data), h, (&data));
X((&data), m, h);
_mm_empty();
}
#endif /* __GOST3411_HAS_MMX__ */

0 comments on commit 36e84c3

Please sign in to comment.