From c7b4e6dd83bc2349f682927b7b239631389869ff Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Wed, 12 Oct 2022 22:13:09 +0200 Subject: [PATCH 1/3] Fully reduce sampled ternary vectors. --- include/nfl/core.hpp | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/include/nfl/core.hpp b/include/nfl/core.hpp index 57ecb97..512e8d9 100644 --- a/include/nfl/core.hpp +++ b/include/nfl/core.hpp @@ -243,7 +243,7 @@ void poly::set(non_uniform const& mode) { } } } - } + } else { for (unsigned int i = 0; i < degree; i++) { @@ -303,10 +303,10 @@ void poly::set(gaussian const& mode mode.fg_prng->getNoise((value_type *)rnd, degree); if (amplifier != 1) for (unsigned int i = 0; i < degree; i++) rnd[i]*= amplifier; - for (size_t cm = 0; cm < nmoduli; cm++) + for (size_t cm = 0; cm < nmoduli; cm++) { for (size_t i = 0 ; i < degree; i++) - { + { if(rnd[i]<0) _data[degree*cm+i] = get_modulus(cm) + rnd[i]; else @@ -338,8 +338,12 @@ void poly::set(ZO_dist const& mode) { for (size_t cm = 0; cm < NbModuli; ++cm) { const T pm = params::P[cm] - 1u; /* sample {-1, 0, 1} */ - for (size_t i = 0; i < Degree; ++i) - *ptr++ = rnd[i] <= mode.rho ? pm + (rnd[i] & 2) : 0u; + for (size_t i = 0; i < Degree; ++i, ptr++) { + *ptr = rnd[i] <= mode.rho ? (pm + (rnd[i] & 2)) : 0u; + if (*ptr > params::P[cm]) { + *ptr -= params::P[cm]; + } + } } } @@ -357,7 +361,7 @@ void poly::set(hwt_dist const& mode) { auto rnd_end = rnd.end(); auto rnd_ptr = rnd_end; /* Reservoir Sampling: uniformly select hwt coefficients. */ - for (size_t k = mode.hwt; k < degree; ++k) + for (size_t k = mode.hwt; k < degree; ++k) { size_t pos = 0; size_t reject_sample = std::numeric_limits::max() / k; @@ -384,8 +388,10 @@ void poly::set(hwt_dist const& mode) { for (size_t cm = 0, offset = 0; cm < NbModuli; ++cm, offset += degree) { const T pm = params::P[cm] - 1u; rnd_ptr = rnd.begin(); - for (size_t pos : hitted) - _data[pos + offset] = pm + ((*rnd_ptr++) & 2U); // {-1, 1} + for (size_t pos : hitted) { + _data[pos + offset] = ((*rnd_ptr++) & 2U); // {-1, 1} + _data[pos + offset] = (_data[pos + offset] > 0 ? 1 : pm); + } } std::memset(hitted.data(), 0x0, hitted.size() * sizeof(size_t)); // erase from memory } @@ -400,9 +406,9 @@ std::ostream& operator<<(std::ostream& outs, poly const& p) { bool first = true; std::string term; - if (typeid(T) == typeid(uint64_t)) term = "ULL"; - else if (typeid(T) == typeid(uint32_t)) term = "UL"; - else term = "U"; + if (typeid(T) == typeid(uint64_t)) term = "ULL"; + else if (typeid(T) == typeid(uint32_t)) term = "UL"; + else term = "U"; outs << "{ "; for(auto v : p) @@ -688,5 +694,3 @@ template void poly Date: Thu, 13 Oct 2022 11:24:29 +0200 Subject: [PATCH 2/3] Extend NFLlib PRNG. --- include/nfl/prng/fastrandombytes.h | 2 ++ lib/prng/fastrandombytes.cpp | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/nfl/prng/fastrandombytes.h b/include/nfl/prng/fastrandombytes.h index 38378a1..84d489e 100644 --- a/include/nfl/prng/fastrandombytes.h +++ b/include/nfl/prng/fastrandombytes.h @@ -8,6 +8,8 @@ #define FASTRANDOMBYTES_H namespace nfl { +void fastrandombytes_seed(unsigned char *s, unsigned long long slen); +void fastrandombytes_reseed(); void fastrandombytes(unsigned char *r, unsigned long long rlen); } diff --git a/lib/prng/fastrandombytes.cpp b/lib/prng/fastrandombytes.cpp index 9b06c93..6fad14f 100644 --- a/lib/prng/fastrandombytes.cpp +++ b/lib/prng/fastrandombytes.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include "nfl/prng/crypto_stream_salsa20.h" #include "nfl/prng/randombytes.h" @@ -18,6 +19,17 @@ static int init = 0; static unsigned char key[crypto_stream_salsa20_KEYBYTES]; static unsigned char nonce[crypto_stream_salsa20_NONCEBYTES] = {0}; +void fastrandombytes_seed(unsigned char *s, unsigned long long slen) { + memcpy(key, s, slen); + memset(key + slen, 0, crypto_stream_salsa20_KEYBYTES - slen); + memset(nonce, 0, crypto_stream_salsa20_NONCEBYTES); + init = -1; +} + +void fastrandombytes_reseed() { + init = 0; +} + void fastrandombytes(unsigned char *r, unsigned long long rlen) { unsigned long long n = 0; int i; @@ -32,4 +44,4 @@ void fastrandombytes(unsigned char *r, unsigned long long rlen) { n++; for (i = 0; i < crypto_stream_salsa20_NONCEBYTES; i++) nonce[i] = (n >> 8 * i) & 0xff; } -} \ No newline at end of file +} From b96c9fd319bd5067907c70883d2f4ef392d029cd Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Sat, 15 Oct 2022 00:24:53 +0200 Subject: [PATCH 3/3] Replace PRNG with AES256 generator. --- include/nfl/prng/fastrandombytes.h | 2 +- lib/prng/fastrandombytes.cpp | 161 +- .../nfl_crypto_stream_salsa20_amd64_xmm6.s | 4823 ----------------- 3 files changed, 132 insertions(+), 4854 deletions(-) delete mode 100644 lib/prng/nfl_crypto_stream_salsa20_amd64_xmm6.s diff --git a/include/nfl/prng/fastrandombytes.h b/include/nfl/prng/fastrandombytes.h index 84d489e..ced0603 100644 --- a/include/nfl/prng/fastrandombytes.h +++ b/include/nfl/prng/fastrandombytes.h @@ -8,7 +8,7 @@ #define FASTRANDOMBYTES_H namespace nfl { -void fastrandombytes_seed(unsigned char *s, unsigned long long slen); +void fastrandombytes_seed(const unsigned char *s); void fastrandombytes_reseed(); void fastrandombytes(unsigned char *r, unsigned long long rlen); } diff --git a/lib/prng/fastrandombytes.cpp b/lib/prng/fastrandombytes.cpp index 6fad14f..a83f740 100644 --- a/lib/prng/fastrandombytes.cpp +++ b/lib/prng/fastrandombytes.cpp @@ -1,28 +1,93 @@ -/* - * File: lattisigns512-20130329/fastrandombytes.c - * Author: Gim Güneysu, Tobias Oder, Thomas Pöppelmann, Peter Schwabe - * Public Domain +/* Adapted from Intel® Advanced Encryption Standard (Intel® AES) Instructions Set - Rev 3.01 + * https://software.intel.com/sites/default/files/article/165683/aes-wp-2012-09-22-v01.pdf */ -#include +#include "fastrandombytes.h" +#include "randombytes.h" #include -#include -#include "nfl/prng/crypto_stream_salsa20.h" -#include "nfl/prng/randombytes.h" +#include -namespace nfl { +static __m128i round_key[15]; +static __m128i iv; +static const __m128i ONE = {1, 0}; + +#define AES256_KEY_LENGTH 32 -static size_t constexpr crypto_stream_salsa20_KEYBYTES = 32; -static size_t constexpr crypto_stream_salsa20_NONCEBYTES = 8; +namespace nfl { static int init = 0; -static unsigned char key[crypto_stream_salsa20_KEYBYTES]; -static unsigned char nonce[crypto_stream_salsa20_NONCEBYTES] = {0}; -void fastrandombytes_seed(unsigned char *s, unsigned long long slen) { - memcpy(key, s, slen); - memset(key + slen, 0, crypto_stream_salsa20_KEYBYTES - slen); - memset(nonce, 0, crypto_stream_salsa20_NONCEBYTES); +static inline void KEY_256_ASSIST_1(__m128i* temp1, __m128i * temp2) +{ + __m128i temp4; + *temp2 = _mm_shuffle_epi32(*temp2, 0xff); + temp4 = _mm_slli_si128(*temp1, 0x4); + *temp1 = _mm_xor_si128(*temp1, temp4); + temp4 = _mm_slli_si128(temp4, 0x4); + *temp1 = _mm_xor_si128(*temp1, temp4); + temp4 = _mm_slli_si128(temp4, 0x4); + *temp1 = _mm_xor_si128(*temp1, temp4); + *temp1 = _mm_xor_si128(*temp1, *temp2); +} + +static inline void KEY_256_ASSIST_2(__m128i* temp1, __m128i * temp3) +{ + __m128i temp2,temp4; + temp4 = _mm_aeskeygenassist_si128(*temp1, 0x0); + temp2 = _mm_shuffle_epi32(temp4, 0xaa); + temp4 = _mm_slli_si128(*temp3, 0x4); + *temp3 = _mm_xor_si128(*temp3, temp4); + temp4 = _mm_slli_si128(temp4, 0x4); + *temp3 = _mm_xor_si128(*temp3, temp4); + temp4 = _mm_slli_si128(temp4, 0x4); + *temp3 = _mm_xor_si128(*temp3, temp4); + *temp3 = _mm_xor_si128(*temp3, temp2); +} + +/* round_key <-- aes256_key_expansion(randomness), iv <-- 0 */ +void fastrandombytes_seed(const unsigned char *randomness) +{ + __m128i temp1, temp2, temp3; + + temp1 = _mm_loadu_si128((__m128i*)randomness); + temp3 = _mm_loadu_si128((__m128i*)(randomness+16)); + round_key[0] = temp1; + round_key[1] = temp3; + temp2 = _mm_aeskeygenassist_si128(temp3,0x01); + KEY_256_ASSIST_1(&temp1, &temp2); + round_key[2]=temp1; + KEY_256_ASSIST_2(&temp1, &temp3); + round_key[3]=temp3; + temp2 = _mm_aeskeygenassist_si128(temp3,0x02); + KEY_256_ASSIST_1(&temp1, &temp2); + round_key[4]=temp1; + KEY_256_ASSIST_2(&temp1, &temp3); + round_key[5]=temp3; + temp2 = _mm_aeskeygenassist_si128(temp3,0x04); + KEY_256_ASSIST_1(&temp1, &temp2); + round_key[6]=temp1; + KEY_256_ASSIST_2(&temp1, &temp3); + round_key[7]=temp3; + temp2 = _mm_aeskeygenassist_si128(temp3,0x08); + KEY_256_ASSIST_1(&temp1, &temp2); + round_key[8]=temp1; + KEY_256_ASSIST_2(&temp1, &temp3); + round_key[9]=temp3; + temp2 = _mm_aeskeygenassist_si128(temp3,0x10); + KEY_256_ASSIST_1(&temp1, &temp2); + round_key[10]=temp1; + KEY_256_ASSIST_2(&temp1, &temp3); + round_key[11]=temp3; + temp2 = _mm_aeskeygenassist_si128(temp3,0x20); + KEY_256_ASSIST_1(&temp1, &temp2); + round_key[12]=temp1; + KEY_256_ASSIST_2(&temp1, &temp3); + round_key[13]=temp3; + temp2 = _mm_aeskeygenassist_si128(temp3,0x40); + KEY_256_ASSIST_1(&temp1, &temp2); + round_key[14]=temp1; + + iv = _mm_setzero_si128(); init = -1; } @@ -30,18 +95,54 @@ void fastrandombytes_reseed() { init = 0; } -void fastrandombytes(unsigned char *r, unsigned long long rlen) { - unsigned long long n = 0; - int i; - if (!init) { - randombytes(key, crypto_stream_salsa20_KEYBYTES); - init = 1; - } - nfl_crypto_stream_salsa20_amd64_xmm6(r, rlen, nonce, key); - - // Increase 64-bit counter (nonce) - for (i = 0; i < crypto_stream_salsa20_NONCEBYTES; i++) n ^= ((unsigned long long)nonce[i]) << 8 * i; - n++; - for (i = 0; i < crypto_stream_salsa20_NONCEBYTES; i++) nonce[i] = (n >> 8 * i) & 0xff; +static inline void AES_ctr_round(unsigned char *out) +{ + __m128i tmp; + + tmp = _mm_xor_si128(iv,round_key[0]); + tmp = _mm_aesenc_si128(tmp,round_key[1]); + tmp = _mm_aesenc_si128(tmp,round_key[2]); + tmp = _mm_aesenc_si128(tmp,round_key[3]); + tmp = _mm_aesenc_si128(tmp,round_key[4]); + tmp = _mm_aesenc_si128(tmp,round_key[5]); + tmp = _mm_aesenc_si128(tmp,round_key[6]); + tmp = _mm_aesenc_si128(tmp,round_key[7]); + tmp = _mm_aesenc_si128(tmp,round_key[8]); + tmp = _mm_aesenc_si128(tmp,round_key[9]); + tmp = _mm_aesenc_si128(tmp,round_key[10]); + tmp = _mm_aesenc_si128(tmp,round_key[11]); + tmp = _mm_aesenc_si128(tmp,round_key[12]); + tmp = _mm_aesenc_si128(tmp,round_key[13]); + tmp = _mm_aesenclast_si128(tmp,round_key[14]); + _mm_storeu_si128((__m128i*)out,tmp); + + iv = _mm_add_epi32(iv, ONE); +} + +/* r <-- aes256_ctr(round_key, iv, rlen) */ +void fastrandombytes(unsigned char *r, unsigned long long rlen) +{ + unsigned char ct[16]; + unsigned long long num_of_blocks = rlen >> 4; + unsigned long long i; + + if (!init) { + unsigned char seed[AES256_KEY_LENGTH]; + randombytes(seed, AES256_KEY_LENGTH); + fastrandombytes_seed(seed); + init = 1; + } + + for (i = 0; i < num_of_blocks; i++) + { + AES_ctr_round(r + (i << 4)); + } + + if (rlen & 0x0f) + { + AES_ctr_round(ct); + + memcpy(r + (i << 4), ct, rlen & 0x0f); + } } } diff --git a/lib/prng/nfl_crypto_stream_salsa20_amd64_xmm6.s b/lib/prng/nfl_crypto_stream_salsa20_amd64_xmm6.s deleted file mode 100644 index eb91973..0000000 --- a/lib/prng/nfl_crypto_stream_salsa20_amd64_xmm6.s +++ /dev/null @@ -1,4823 +0,0 @@ - -# qhasm: int64 r11_caller - -# qhasm: int64 r12_caller - -# qhasm: int64 r13_caller - -# qhasm: int64 r14_caller - -# qhasm: int64 r15_caller - -# qhasm: int64 rbx_caller - -# qhasm: int64 rbp_caller - -# qhasm: caller r11_caller - -# qhasm: caller r12_caller - -# qhasm: caller r13_caller - -# qhasm: caller r14_caller - -# qhasm: caller r15_caller - -# qhasm: caller rbx_caller - -# qhasm: caller rbp_caller - -# qhasm: stack64 r11_stack - -# qhasm: stack64 r12_stack - -# qhasm: stack64 r13_stack - -# qhasm: stack64 r14_stack - -# qhasm: stack64 r15_stack - -# qhasm: stack64 rbx_stack - -# qhasm: stack64 rbp_stack - -# qhasm: int64 a - -# qhasm: int64 arg1 - -# qhasm: int64 arg2 - -# qhasm: int64 arg3 - -# qhasm: int64 arg4 - -# qhasm: int64 arg5 - -# qhasm: input arg1 - -# qhasm: input arg2 - -# qhasm: input arg3 - -# qhasm: input arg4 - -# qhasm: input arg5 - -# qhasm: int64 k - -# qhasm: int64 kbits - -# qhasm: int64 iv - -# qhasm: int64 i - -# qhasm: stack128 x0 - -# qhasm: stack128 x1 - -# qhasm: stack128 x2 - -# qhasm: stack128 x3 - -# qhasm: int64 m - -# qhasm: int64 out - -# qhasm: int64 bytes - -# qhasm: stack32 eax_stack - -# qhasm: stack32 ebx_stack - -# qhasm: stack32 esi_stack - -# qhasm: stack32 edi_stack - -# qhasm: stack32 ebp_stack - -# qhasm: int6464 diag0 - -# qhasm: int6464 diag1 - -# qhasm: int6464 diag2 - -# qhasm: int6464 diag3 - -# qhasm: int6464 a0 - -# qhasm: int6464 a1 - -# qhasm: int6464 a2 - -# qhasm: int6464 a3 - -# qhasm: int6464 a4 - -# qhasm: int6464 a5 - -# qhasm: int6464 a6 - -# qhasm: int6464 a7 - -# qhasm: int6464 b0 - -# qhasm: int6464 b1 - -# qhasm: int6464 b2 - -# qhasm: int6464 b3 - -# qhasm: int6464 b4 - -# qhasm: int6464 b5 - -# qhasm: int6464 b6 - -# qhasm: int6464 b7 - -# qhasm: int6464 z0 - -# qhasm: int6464 z1 - -# qhasm: int6464 z2 - -# qhasm: int6464 z3 - -# qhasm: int6464 z4 - -# qhasm: int6464 z5 - -# qhasm: int6464 z6 - -# qhasm: int6464 z7 - -# qhasm: int6464 z8 - -# qhasm: int6464 z9 - -# qhasm: int6464 z10 - -# qhasm: int6464 z11 - -# qhasm: int6464 z12 - -# qhasm: int6464 z13 - -# qhasm: int6464 z14 - -# qhasm: int6464 z15 - -# qhasm: stack128 z0_stack - -# qhasm: stack128 z1_stack - -# qhasm: stack128 z2_stack - -# qhasm: stack128 z3_stack - -# qhasm: stack128 z4_stack - -# qhasm: stack128 z5_stack - -# qhasm: stack128 z6_stack - -# qhasm: stack128 z7_stack - -# qhasm: stack128 z8_stack - -# qhasm: stack128 z9_stack - -# qhasm: stack128 z10_stack - -# qhasm: stack128 z11_stack - -# qhasm: stack128 z12_stack - -# qhasm: stack128 z13_stack - -# qhasm: stack128 z14_stack - -# qhasm: stack128 z15_stack - -# qhasm: int6464 y0 - -# qhasm: int6464 y1 - -# qhasm: int6464 y2 - -# qhasm: int6464 y3 - -# qhasm: int6464 y4 - -# qhasm: int6464 y5 - -# qhasm: int6464 y6 - -# qhasm: int6464 y7 - -# qhasm: int6464 y8 - -# qhasm: int6464 y9 - -# qhasm: int6464 y10 - -# qhasm: int6464 y11 - -# qhasm: int6464 y12 - -# qhasm: int6464 y13 - -# qhasm: int6464 y14 - -# qhasm: int6464 y15 - -# qhasm: int6464 r0 - -# qhasm: int6464 r1 - -# qhasm: int6464 r2 - -# qhasm: int6464 r3 - -# qhasm: int6464 r4 - -# qhasm: int6464 r5 - -# qhasm: int6464 r6 - -# qhasm: int6464 r7 - -# qhasm: int6464 r8 - -# qhasm: int6464 r9 - -# qhasm: int6464 r10 - -# qhasm: int6464 r11 - -# qhasm: int6464 r12 - -# qhasm: int6464 r13 - -# qhasm: int6464 r14 - -# qhasm: int6464 r15 - -# qhasm: stack128 orig0 - -# qhasm: stack128 orig1 - -# qhasm: stack128 orig2 - -# qhasm: stack128 orig3 - -# qhasm: stack128 orig4 - -# qhasm: stack128 orig5 - -# qhasm: stack128 orig6 - -# qhasm: stack128 orig7 - -# qhasm: stack128 orig8 - -# qhasm: stack128 orig9 - -# qhasm: stack128 orig10 - -# qhasm: stack128 orig11 - -# qhasm: stack128 orig12 - -# qhasm: stack128 orig13 - -# qhasm: stack128 orig14 - -# qhasm: stack128 orig15 - -# qhasm: int64 in0 - -# qhasm: int64 in1 - -# qhasm: int64 in2 - -# qhasm: int64 in3 - -# qhasm: int64 in4 - -# qhasm: int64 in5 - -# qhasm: int64 in6 - -# qhasm: int64 in7 - -# qhasm: int64 in8 - -# qhasm: int64 in9 - -# qhasm: int64 in10 - -# qhasm: int64 in11 - -# qhasm: int64 in12 - -# qhasm: int64 in13 - -# qhasm: int64 in14 - -# qhasm: int64 in15 - -# qhasm: stack512 tmp - -# qhasm: int64 ctarget - -# qhasm: stack64 bytes_backup - -# qhasm: enter nfl_crypto_stream_salsa20_amd64_xmm6 -.text -.p2align 5 -.globl _nfl_crypto_stream_salsa20_amd64_xmm6 -.globl nfl_crypto_stream_salsa20_amd64_xmm6 -_nfl_crypto_stream_salsa20_amd64_xmm6: -nfl_crypto_stream_salsa20_amd64_xmm6: -mov %rsp,%r11 -and $31,%r11 -add $480,%r11 -sub %r11,%rsp - -# qhasm: r11_stack = r11_caller -# asm 1: movq r11_stack=stack64#1 -# asm 2: movq r11_stack=352(%rsp) -movq %r11,352(%rsp) - -# qhasm: r12_stack = r12_caller -# asm 1: movq r12_stack=stack64#2 -# asm 2: movq r12_stack=360(%rsp) -movq %r12,360(%rsp) - -# qhasm: r13_stack = r13_caller -# asm 1: movq r13_stack=stack64#3 -# asm 2: movq r13_stack=368(%rsp) -movq %r13,368(%rsp) - -# qhasm: r14_stack = r14_caller -# asm 1: movq r14_stack=stack64#4 -# asm 2: movq r14_stack=376(%rsp) -movq %r14,376(%rsp) - -# qhasm: r15_stack = r15_caller -# asm 1: movq r15_stack=stack64#5 -# asm 2: movq r15_stack=384(%rsp) -movq %r15,384(%rsp) - -# qhasm: rbx_stack = rbx_caller -# asm 1: movq rbx_stack=stack64#6 -# asm 2: movq rbx_stack=392(%rsp) -movq %rbx,392(%rsp) - -# qhasm: rbp_stack = rbp_caller -# asm 1: movq rbp_stack=stack64#7 -# asm 2: movq rbp_stack=400(%rsp) -movq %rbp,400(%rsp) - -# qhasm: bytes = arg2 -# asm 1: mov bytes=int64#6 -# asm 2: mov bytes=%r9 -mov %rsi,%r9 - -# qhasm: out = arg1 -# asm 1: mov out=int64#1 -# asm 2: mov out=%rdi -mov %rdi,%rdi - -# qhasm: m = out -# asm 1: mov m=int64#2 -# asm 2: mov m=%rsi -mov %rdi,%rsi - -# qhasm: iv = arg3 -# asm 1: mov iv=int64#3 -# asm 2: mov iv=%rdx -mov %rdx,%rdx - -# qhasm: k = arg4 -# asm 1: mov k=int64#8 -# asm 2: mov k=%r10 -mov %rcx,%r10 - -# qhasm: unsigned>? bytes - 0 -# asm 1: cmp $0, -jbe ._done - -# qhasm: a = 0 -# asm 1: mov $0,>a=int64#7 -# asm 2: mov $0,>a=%rax -mov $0,%rax - -# qhasm: i = bytes -# asm 1: mov i=int64#4 -# asm 2: mov i=%rcx -mov %r9,%rcx - -# qhasm: while (i) { *out++ = a; --i } -rep stosb - -# qhasm: out -= bytes -# asm 1: sub r11_stack=stack64#1 -# asm 2: movq r11_stack=352(%rsp) -movq %r11,352(%rsp) - -# qhasm: r12_stack = r12_caller -# asm 1: movq r12_stack=stack64#2 -# asm 2: movq r12_stack=360(%rsp) -movq %r12,360(%rsp) - -# qhasm: r13_stack = r13_caller -# asm 1: movq r13_stack=stack64#3 -# asm 2: movq r13_stack=368(%rsp) -movq %r13,368(%rsp) - -# qhasm: r14_stack = r14_caller -# asm 1: movq r14_stack=stack64#4 -# asm 2: movq r14_stack=376(%rsp) -movq %r14,376(%rsp) - -# qhasm: r15_stack = r15_caller -# asm 1: movq r15_stack=stack64#5 -# asm 2: movq r15_stack=384(%rsp) -movq %r15,384(%rsp) - -# qhasm: rbx_stack = rbx_caller -# asm 1: movq rbx_stack=stack64#6 -# asm 2: movq rbx_stack=392(%rsp) -movq %rbx,392(%rsp) - -# qhasm: rbp_stack = rbp_caller -# asm 1: movq rbp_stack=stack64#7 -# asm 2: movq rbp_stack=400(%rsp) -movq %rbp,400(%rsp) - -# qhasm: out = arg1 -# asm 1: mov out=int64#1 -# asm 2: mov out=%rdi -mov %rdi,%rdi - -# qhasm: m = arg2 -# asm 1: mov m=int64#2 -# asm 2: mov m=%rsi -mov %rsi,%rsi - -# qhasm: bytes = arg3 -# asm 1: mov bytes=int64#6 -# asm 2: mov bytes=%r9 -mov %rdx,%r9 - -# qhasm: iv = arg4 -# asm 1: mov iv=int64#3 -# asm 2: mov iv=%rdx -mov %rcx,%rdx - -# qhasm: k = arg5 -# asm 1: mov k=int64#8 -# asm 2: mov k=%r10 -mov %r8,%r10 - -# qhasm: unsigned>? bytes - 0 -# asm 1: cmp $0, -jbe ._done -# comment:fp stack unchanged by fallthrough - -# qhasm: start: -._start: - -# qhasm: in12 = *(uint32 *) (k + 20) -# asm 1: movl 20(in12=int64#4d -# asm 2: movl 20(in12=%ecx -movl 20(%r10),%ecx - -# qhasm: in1 = *(uint32 *) (k + 0) -# asm 1: movl 0(in1=int64#5d -# asm 2: movl 0(in1=%r8d -movl 0(%r10),%r8d - -# qhasm: in6 = *(uint32 *) (iv + 0) -# asm 1: movl 0(in6=int64#7d -# asm 2: movl 0(in6=%eax -movl 0(%rdx),%eax - -# qhasm: in11 = *(uint32 *) (k + 16) -# asm 1: movl 16(in11=int64#9d -# asm 2: movl 16(in11=%r11d -movl 16(%r10),%r11d - -# qhasm: ((uint32 *)&x1)[0] = in12 -# asm 1: movl x1=stack128#1 -# asm 2: movl x1=0(%rsp) -movl %ecx,0(%rsp) - -# qhasm: ((uint32 *)&x1)[1] = in1 -# asm 1: movl in8=int64#4 -# asm 2: mov $0,>in8=%rcx -mov $0,%rcx - -# qhasm: in13 = *(uint32 *) (k + 24) -# asm 1: movl 24(in13=int64#5d -# asm 2: movl 24(in13=%r8d -movl 24(%r10),%r8d - -# qhasm: in2 = *(uint32 *) (k + 4) -# asm 1: movl 4(in2=int64#7d -# asm 2: movl 4(in2=%eax -movl 4(%r10),%eax - -# qhasm: in7 = *(uint32 *) (iv + 4) -# asm 1: movl 4(in7=int64#3d -# asm 2: movl 4(in7=%edx -movl 4(%rdx),%edx - -# qhasm: ((uint32 *)&x2)[0] = in8 -# asm 1: movl x2=stack128#2 -# asm 2: movl x2=16(%rsp) -movl %ecx,16(%rsp) - -# qhasm: ((uint32 *)&x2)[1] = in13 -# asm 1: movl in4=int64#3d -# asm 2: movl 12(in4=%edx -movl 12(%r10),%edx - -# qhasm: in9 = 0 -# asm 1: mov $0,>in9=int64#4 -# asm 2: mov $0,>in9=%rcx -mov $0,%rcx - -# qhasm: in14 = *(uint32 *) (k + 28) -# asm 1: movl 28(in14=int64#5d -# asm 2: movl 28(in14=%r8d -movl 28(%r10),%r8d - -# qhasm: in3 = *(uint32 *) (k + 8) -# asm 1: movl 8(in3=int64#7d -# asm 2: movl 8(in3=%eax -movl 8(%r10),%eax - -# qhasm: ((uint32 *)&x3)[0] = in4 -# asm 1: movl x3=stack128#3 -# asm 2: movl x3=32(%rsp) -movl %edx,32(%rsp) - -# qhasm: ((uint32 *)&x3)[1] = in9 -# asm 1: movl in0=int64#3 -# asm 2: mov $1634760805,>in0=%rdx -mov $1634760805,%rdx - -# qhasm: in5 = 857760878 -# asm 1: mov $857760878,>in5=int64#4 -# asm 2: mov $857760878,>in5=%rcx -mov $857760878,%rcx - -# qhasm: in10 = 2036477234 -# asm 1: mov $2036477234,>in10=int64#5 -# asm 2: mov $2036477234,>in10=%r8 -mov $2036477234,%r8 - -# qhasm: in15 = 1797285236 -# asm 1: mov $1797285236,>in15=int64#7 -# asm 2: mov $1797285236,>in15=%rax -mov $1797285236,%rax - -# qhasm: ((uint32 *)&x0)[0] = in0 -# asm 1: movl x0=stack128#4 -# asm 2: movl x0=48(%rsp) -movl %edx,48(%rsp) - -# qhasm: ((uint32 *)&x0)[1] = in5 -# asm 1: movl z0=int6464#1 -# asm 2: movdqa z0=%xmm0 -movdqa 48(%rsp),%xmm0 - -# qhasm: z5 = z0[1,1,1,1] -# asm 1: pshufd $0x55,z5=int6464#2 -# asm 2: pshufd $0x55,z5=%xmm1 -pshufd $0x55,%xmm0,%xmm1 - -# qhasm: z10 = z0[2,2,2,2] -# asm 1: pshufd $0xaa,z10=int6464#3 -# asm 2: pshufd $0xaa,z10=%xmm2 -pshufd $0xaa,%xmm0,%xmm2 - -# qhasm: z15 = z0[3,3,3,3] -# asm 1: pshufd $0xff,z15=int6464#4 -# asm 2: pshufd $0xff,z15=%xmm3 -pshufd $0xff,%xmm0,%xmm3 - -# qhasm: z0 = z0[0,0,0,0] -# asm 1: pshufd $0x00,z0=int6464#1 -# asm 2: pshufd $0x00,z0=%xmm0 -pshufd $0x00,%xmm0,%xmm0 - -# qhasm: orig5 = z5 -# asm 1: movdqa orig5=stack128#5 -# asm 2: movdqa orig5=64(%rsp) -movdqa %xmm1,64(%rsp) - -# qhasm: orig10 = z10 -# asm 1: movdqa orig10=stack128#6 -# asm 2: movdqa orig10=80(%rsp) -movdqa %xmm2,80(%rsp) - -# qhasm: orig15 = z15 -# asm 1: movdqa orig15=stack128#7 -# asm 2: movdqa orig15=96(%rsp) -movdqa %xmm3,96(%rsp) - -# qhasm: orig0 = z0 -# asm 1: movdqa orig0=stack128#8 -# asm 2: movdqa orig0=112(%rsp) -movdqa %xmm0,112(%rsp) - -# qhasm: z1 = x1 -# asm 1: movdqa z1=int6464#1 -# asm 2: movdqa z1=%xmm0 -movdqa 0(%rsp),%xmm0 - -# qhasm: z6 = z1[2,2,2,2] -# asm 1: pshufd $0xaa,z6=int6464#2 -# asm 2: pshufd $0xaa,z6=%xmm1 -pshufd $0xaa,%xmm0,%xmm1 - -# qhasm: z11 = z1[3,3,3,3] -# asm 1: pshufd $0xff,z11=int6464#3 -# asm 2: pshufd $0xff,z11=%xmm2 -pshufd $0xff,%xmm0,%xmm2 - -# qhasm: z12 = z1[0,0,0,0] -# asm 1: pshufd $0x00,z12=int6464#4 -# asm 2: pshufd $0x00,z12=%xmm3 -pshufd $0x00,%xmm0,%xmm3 - -# qhasm: z1 = z1[1,1,1,1] -# asm 1: pshufd $0x55,z1=int6464#1 -# asm 2: pshufd $0x55,z1=%xmm0 -pshufd $0x55,%xmm0,%xmm0 - -# qhasm: orig6 = z6 -# asm 1: movdqa orig6=stack128#9 -# asm 2: movdqa orig6=128(%rsp) -movdqa %xmm1,128(%rsp) - -# qhasm: orig11 = z11 -# asm 1: movdqa orig11=stack128#10 -# asm 2: movdqa orig11=144(%rsp) -movdqa %xmm2,144(%rsp) - -# qhasm: orig12 = z12 -# asm 1: movdqa orig12=stack128#11 -# asm 2: movdqa orig12=160(%rsp) -movdqa %xmm3,160(%rsp) - -# qhasm: orig1 = z1 -# asm 1: movdqa orig1=stack128#12 -# asm 2: movdqa orig1=176(%rsp) -movdqa %xmm0,176(%rsp) - -# qhasm: z2 = x2 -# asm 1: movdqa z2=int6464#1 -# asm 2: movdqa z2=%xmm0 -movdqa 16(%rsp),%xmm0 - -# qhasm: z7 = z2[3,3,3,3] -# asm 1: pshufd $0xff,z7=int6464#2 -# asm 2: pshufd $0xff,z7=%xmm1 -pshufd $0xff,%xmm0,%xmm1 - -# qhasm: z13 = z2[1,1,1,1] -# asm 1: pshufd $0x55,z13=int6464#3 -# asm 2: pshufd $0x55,z13=%xmm2 -pshufd $0x55,%xmm0,%xmm2 - -# qhasm: z2 = z2[2,2,2,2] -# asm 1: pshufd $0xaa,z2=int6464#1 -# asm 2: pshufd $0xaa,z2=%xmm0 -pshufd $0xaa,%xmm0,%xmm0 - -# qhasm: orig7 = z7 -# asm 1: movdqa orig7=stack128#13 -# asm 2: movdqa orig7=192(%rsp) -movdqa %xmm1,192(%rsp) - -# qhasm: orig13 = z13 -# asm 1: movdqa orig13=stack128#14 -# asm 2: movdqa orig13=208(%rsp) -movdqa %xmm2,208(%rsp) - -# qhasm: orig2 = z2 -# asm 1: movdqa orig2=stack128#15 -# asm 2: movdqa orig2=224(%rsp) -movdqa %xmm0,224(%rsp) - -# qhasm: z3 = x3 -# asm 1: movdqa z3=int6464#1 -# asm 2: movdqa z3=%xmm0 -movdqa 32(%rsp),%xmm0 - -# qhasm: z4 = z3[0,0,0,0] -# asm 1: pshufd $0x00,z4=int6464#2 -# asm 2: pshufd $0x00,z4=%xmm1 -pshufd $0x00,%xmm0,%xmm1 - -# qhasm: z14 = z3[2,2,2,2] -# asm 1: pshufd $0xaa,z14=int6464#3 -# asm 2: pshufd $0xaa,z14=%xmm2 -pshufd $0xaa,%xmm0,%xmm2 - -# qhasm: z3 = z3[3,3,3,3] -# asm 1: pshufd $0xff,z3=int6464#1 -# asm 2: pshufd $0xff,z3=%xmm0 -pshufd $0xff,%xmm0,%xmm0 - -# qhasm: orig4 = z4 -# asm 1: movdqa orig4=stack128#16 -# asm 2: movdqa orig4=240(%rsp) -movdqa %xmm1,240(%rsp) - -# qhasm: orig14 = z14 -# asm 1: movdqa orig14=stack128#17 -# asm 2: movdqa orig14=256(%rsp) -movdqa %xmm2,256(%rsp) - -# qhasm: orig3 = z3 -# asm 1: movdqa orig3=stack128#18 -# asm 2: movdqa orig3=272(%rsp) -movdqa %xmm0,272(%rsp) - -# qhasm: bytesatleast256: -._bytesatleast256: - -# qhasm: in8 = ((uint32 *)&x2)[0] -# asm 1: movl in8=int64#3d -# asm 2: movl in8=%edx -movl 16(%rsp),%edx - -# qhasm: in9 = ((uint32 *)&x3)[1] -# asm 1: movl 4+in9=int64#4d -# asm 2: movl 4+in9=%ecx -movl 4+32(%rsp),%ecx - -# qhasm: ((uint32 *) &orig8)[0] = in8 -# asm 1: movl orig8=stack128#19 -# asm 2: movl orig8=288(%rsp) -movl %edx,288(%rsp) - -# qhasm: ((uint32 *) &orig9)[0] = in9 -# asm 1: movl orig9=stack128#20 -# asm 2: movl orig9=304(%rsp) -movl %ecx,304(%rsp) - -# qhasm: in8 += 1 -# asm 1: add $1,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,x2=stack128#2 -# asm 2: movl x2=16(%rsp) -movl %edx,16(%rsp) - -# qhasm: ((uint32 *)&x3)[1] = in9 -# asm 1: movl bytes_backup=stack64#8 -# asm 2: movq bytes_backup=408(%rsp) -movq %r9,408(%rsp) - -# qhasm: i = 20 -# asm 1: mov $20,>i=int64#3 -# asm 2: mov $20,>i=%rdx -mov $20,%rdx - -# qhasm: z5 = orig5 -# asm 1: movdqa z5=int6464#1 -# asm 2: movdqa z5=%xmm0 -movdqa 64(%rsp),%xmm0 - -# qhasm: z10 = orig10 -# asm 1: movdqa z10=int6464#2 -# asm 2: movdqa z10=%xmm1 -movdqa 80(%rsp),%xmm1 - -# qhasm: z15 = orig15 -# asm 1: movdqa z15=int6464#3 -# asm 2: movdqa z15=%xmm2 -movdqa 96(%rsp),%xmm2 - -# qhasm: z14 = orig14 -# asm 1: movdqa z14=int6464#4 -# asm 2: movdqa z14=%xmm3 -movdqa 256(%rsp),%xmm3 - -# qhasm: z3 = orig3 -# asm 1: movdqa z3=int6464#5 -# asm 2: movdqa z3=%xmm4 -movdqa 272(%rsp),%xmm4 - -# qhasm: z6 = orig6 -# asm 1: movdqa z6=int6464#6 -# asm 2: movdqa z6=%xmm5 -movdqa 128(%rsp),%xmm5 - -# qhasm: z11 = orig11 -# asm 1: movdqa z11=int6464#7 -# asm 2: movdqa z11=%xmm6 -movdqa 144(%rsp),%xmm6 - -# qhasm: z1 = orig1 -# asm 1: movdqa z1=int6464#8 -# asm 2: movdqa z1=%xmm7 -movdqa 176(%rsp),%xmm7 - -# qhasm: z7 = orig7 -# asm 1: movdqa z7=int6464#9 -# asm 2: movdqa z7=%xmm8 -movdqa 192(%rsp),%xmm8 - -# qhasm: z13 = orig13 -# asm 1: movdqa z13=int6464#10 -# asm 2: movdqa z13=%xmm9 -movdqa 208(%rsp),%xmm9 - -# qhasm: z2 = orig2 -# asm 1: movdqa z2=int6464#11 -# asm 2: movdqa z2=%xmm10 -movdqa 224(%rsp),%xmm10 - -# qhasm: z9 = orig9 -# asm 1: movdqa z9=int6464#12 -# asm 2: movdqa z9=%xmm11 -movdqa 304(%rsp),%xmm11 - -# qhasm: z0 = orig0 -# asm 1: movdqa z0=int6464#13 -# asm 2: movdqa z0=%xmm12 -movdqa 112(%rsp),%xmm12 - -# qhasm: z12 = orig12 -# asm 1: movdqa z12=int6464#14 -# asm 2: movdqa z12=%xmm13 -movdqa 160(%rsp),%xmm13 - -# qhasm: z4 = orig4 -# asm 1: movdqa z4=int6464#15 -# asm 2: movdqa z4=%xmm14 -movdqa 240(%rsp),%xmm14 - -# qhasm: z8 = orig8 -# asm 1: movdqa z8=int6464#16 -# asm 2: movdqa z8=%xmm15 -movdqa 288(%rsp),%xmm15 - -# qhasm: mainloop1: -._mainloop1: - -# qhasm: z10_stack = z10 -# asm 1: movdqa z10_stack=stack128#21 -# asm 2: movdqa z10_stack=320(%rsp) -movdqa %xmm1,320(%rsp) - -# qhasm: z15_stack = z15 -# asm 1: movdqa z15_stack=stack128#22 -# asm 2: movdqa z15_stack=336(%rsp) -movdqa %xmm2,336(%rsp) - -# qhasm: y4 = z12 -# asm 1: movdqa y4=int6464#2 -# asm 2: movdqa y4=%xmm1 -movdqa %xmm13,%xmm1 - -# qhasm: uint32323232 y4 += z0 -# asm 1: paddd r4=int6464#3 -# asm 2: movdqa r4=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y4 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y9=int6464#2 -# asm 2: movdqa y9=%xmm1 -movdqa %xmm7,%xmm1 - -# qhasm: uint32323232 y9 += z5 -# asm 1: paddd r9=int6464#3 -# asm 2: movdqa r9=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y9 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y8=int6464#2 -# asm 2: movdqa y8=%xmm1 -movdqa %xmm12,%xmm1 - -# qhasm: uint32323232 y8 += z4 -# asm 1: paddd r8=int6464#3 -# asm 2: movdqa r8=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y8 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y13=int6464#2 -# asm 2: movdqa y13=%xmm1 -movdqa %xmm0,%xmm1 - -# qhasm: uint32323232 y13 += z9 -# asm 1: paddd r13=int6464#3 -# asm 2: movdqa r13=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y13 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y12=int6464#2 -# asm 2: movdqa y12=%xmm1 -movdqa %xmm14,%xmm1 - -# qhasm: uint32323232 y12 += z8 -# asm 1: paddd r12=int6464#3 -# asm 2: movdqa r12=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y12 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y1=int6464#2 -# asm 2: movdqa y1=%xmm1 -movdqa %xmm11,%xmm1 - -# qhasm: uint32323232 y1 += z13 -# asm 1: paddd r1=int6464#3 -# asm 2: movdqa r1=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y1 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y0=int6464#2 -# asm 2: movdqa y0=%xmm1 -movdqa %xmm15,%xmm1 - -# qhasm: uint32323232 y0 += z12 -# asm 1: paddd r0=int6464#3 -# asm 2: movdqa r0=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y0 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z10=int6464#2 -# asm 2: movdqa z10=%xmm1 -movdqa 320(%rsp),%xmm1 - -# qhasm: z0_stack = z0 -# asm 1: movdqa z0_stack=stack128#21 -# asm 2: movdqa z0_stack=320(%rsp) -movdqa %xmm12,320(%rsp) - -# qhasm: y5 = z13 -# asm 1: movdqa y5=int6464#3 -# asm 2: movdqa y5=%xmm2 -movdqa %xmm9,%xmm2 - -# qhasm: uint32323232 y5 += z1 -# asm 1: paddd r5=int6464#13 -# asm 2: movdqa r5=%xmm12 -movdqa %xmm2,%xmm12 - -# qhasm: uint32323232 y5 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,y14=int6464#3 -# asm 2: movdqa y14=%xmm2 -movdqa %xmm5,%xmm2 - -# qhasm: uint32323232 y14 += z10 -# asm 1: paddd r14=int6464#13 -# asm 2: movdqa r14=%xmm12 -movdqa %xmm2,%xmm12 - -# qhasm: uint32323232 y14 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,z15=int6464#3 -# asm 2: movdqa z15=%xmm2 -movdqa 336(%rsp),%xmm2 - -# qhasm: z5_stack = z5 -# asm 1: movdqa z5_stack=stack128#22 -# asm 2: movdqa z5_stack=336(%rsp) -movdqa %xmm0,336(%rsp) - -# qhasm: y3 = z11 -# asm 1: movdqa y3=int6464#1 -# asm 2: movdqa y3=%xmm0 -movdqa %xmm6,%xmm0 - -# qhasm: uint32323232 y3 += z15 -# asm 1: paddd r3=int6464#13 -# asm 2: movdqa r3=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y3 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y2=int6464#1 -# asm 2: movdqa y2=%xmm0 -movdqa %xmm1,%xmm0 - -# qhasm: uint32323232 y2 += z14 -# asm 1: paddd r2=int6464#13 -# asm 2: movdqa r2=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y2 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y7=int6464#1 -# asm 2: movdqa y7=%xmm0 -movdqa %xmm2,%xmm0 - -# qhasm: uint32323232 y7 += z3 -# asm 1: paddd r7=int6464#13 -# asm 2: movdqa r7=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y7 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y6=int6464#1 -# asm 2: movdqa y6=%xmm0 -movdqa %xmm3,%xmm0 - -# qhasm: uint32323232 y6 += z2 -# asm 1: paddd r6=int6464#13 -# asm 2: movdqa r6=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y6 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y11=int6464#1 -# asm 2: movdqa y11=%xmm0 -movdqa %xmm4,%xmm0 - -# qhasm: uint32323232 y11 += z7 -# asm 1: paddd r11=int6464#13 -# asm 2: movdqa r11=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y11 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y10=int6464#1 -# asm 2: movdqa y10=%xmm0 -movdqa %xmm10,%xmm0 - -# qhasm: uint32323232 y10 += z6 -# asm 1: paddd r10=int6464#13 -# asm 2: movdqa r10=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y10 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z0=int6464#1 -# asm 2: movdqa z0=%xmm0 -movdqa 320(%rsp),%xmm0 - -# qhasm: z10_stack = z10 -# asm 1: movdqa z10_stack=stack128#21 -# asm 2: movdqa z10_stack=320(%rsp) -movdqa %xmm1,320(%rsp) - -# qhasm: y1 = z3 -# asm 1: movdqa y1=int6464#2 -# asm 2: movdqa y1=%xmm1 -movdqa %xmm4,%xmm1 - -# qhasm: uint32323232 y1 += z0 -# asm 1: paddd r1=int6464#13 -# asm 2: movdqa r1=%xmm12 -movdqa %xmm1,%xmm12 - -# qhasm: uint32323232 y1 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y15=int6464#2 -# asm 2: movdqa y15=%xmm1 -movdqa %xmm8,%xmm1 - -# qhasm: uint32323232 y15 += z11 -# asm 1: paddd r15=int6464#13 -# asm 2: movdqa r15=%xmm12 -movdqa %xmm1,%xmm12 - -# qhasm: uint32323232 y15 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z5=int6464#13 -# asm 2: movdqa z5=%xmm12 -movdqa 336(%rsp),%xmm12 - -# qhasm: z15_stack = z15 -# asm 1: movdqa z15_stack=stack128#22 -# asm 2: movdqa z15_stack=336(%rsp) -movdqa %xmm2,336(%rsp) - -# qhasm: y6 = z4 -# asm 1: movdqa y6=int6464#2 -# asm 2: movdqa y6=%xmm1 -movdqa %xmm14,%xmm1 - -# qhasm: uint32323232 y6 += z5 -# asm 1: paddd r6=int6464#3 -# asm 2: movdqa r6=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y6 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y2=int6464#2 -# asm 2: movdqa y2=%xmm1 -movdqa %xmm0,%xmm1 - -# qhasm: uint32323232 y2 += z1 -# asm 1: paddd r2=int6464#3 -# asm 2: movdqa r2=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y2 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y7=int6464#2 -# asm 2: movdqa y7=%xmm1 -movdqa %xmm12,%xmm1 - -# qhasm: uint32323232 y7 += z6 -# asm 1: paddd r7=int6464#3 -# asm 2: movdqa r7=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y7 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y3=int6464#2 -# asm 2: movdqa y3=%xmm1 -movdqa %xmm7,%xmm1 - -# qhasm: uint32323232 y3 += z2 -# asm 1: paddd r3=int6464#3 -# asm 2: movdqa r3=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y3 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y4=int6464#2 -# asm 2: movdqa y4=%xmm1 -movdqa %xmm5,%xmm1 - -# qhasm: uint32323232 y4 += z7 -# asm 1: paddd r4=int6464#3 -# asm 2: movdqa r4=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y4 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y0=int6464#2 -# asm 2: movdqa y0=%xmm1 -movdqa %xmm10,%xmm1 - -# qhasm: uint32323232 y0 += z3 -# asm 1: paddd r0=int6464#3 -# asm 2: movdqa r0=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y0 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z10=int6464#2 -# asm 2: movdqa z10=%xmm1 -movdqa 320(%rsp),%xmm1 - -# qhasm: z0_stack = z0 -# asm 1: movdqa z0_stack=stack128#21 -# asm 2: movdqa z0_stack=320(%rsp) -movdqa %xmm0,320(%rsp) - -# qhasm: y5 = z7 -# asm 1: movdqa y5=int6464#1 -# asm 2: movdqa y5=%xmm0 -movdqa %xmm8,%xmm0 - -# qhasm: uint32323232 y5 += z4 -# asm 1: paddd r5=int6464#3 -# asm 2: movdqa r5=%xmm2 -movdqa %xmm0,%xmm2 - -# qhasm: uint32323232 y5 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,y11=int6464#1 -# asm 2: movdqa y11=%xmm0 -movdqa %xmm11,%xmm0 - -# qhasm: uint32323232 y11 += z10 -# asm 1: paddd r11=int6464#3 -# asm 2: movdqa r11=%xmm2 -movdqa %xmm0,%xmm2 - -# qhasm: uint32323232 y11 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,z15=int6464#3 -# asm 2: movdqa z15=%xmm2 -movdqa 336(%rsp),%xmm2 - -# qhasm: z5_stack = z5 -# asm 1: movdqa z5_stack=stack128#22 -# asm 2: movdqa z5_stack=336(%rsp) -movdqa %xmm12,336(%rsp) - -# qhasm: y12 = z14 -# asm 1: movdqa y12=int6464#1 -# asm 2: movdqa y12=%xmm0 -movdqa %xmm3,%xmm0 - -# qhasm: uint32323232 y12 += z15 -# asm 1: paddd r12=int6464#13 -# asm 2: movdqa r12=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y12 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y8=int6464#1 -# asm 2: movdqa y8=%xmm0 -movdqa %xmm1,%xmm0 - -# qhasm: uint32323232 y8 += z11 -# asm 1: paddd r8=int6464#13 -# asm 2: movdqa r8=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y8 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y13=int6464#1 -# asm 2: movdqa y13=%xmm0 -movdqa %xmm2,%xmm0 - -# qhasm: uint32323232 y13 += z12 -# asm 1: paddd r13=int6464#13 -# asm 2: movdqa r13=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y13 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y9=int6464#1 -# asm 2: movdqa y9=%xmm0 -movdqa %xmm6,%xmm0 - -# qhasm: uint32323232 y9 += z8 -# asm 1: paddd r9=int6464#13 -# asm 2: movdqa r9=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y9 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y14=int6464#1 -# asm 2: movdqa y14=%xmm0 -movdqa %xmm13,%xmm0 - -# qhasm: uint32323232 y14 += z13 -# asm 1: paddd r14=int6464#13 -# asm 2: movdqa r14=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y14 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y10=int6464#1 -# asm 2: movdqa y10=%xmm0 -movdqa %xmm15,%xmm0 - -# qhasm: uint32323232 y10 += z9 -# asm 1: paddd r10=int6464#13 -# asm 2: movdqa r10=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y10 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,y15=int6464#1 -# asm 2: movdqa y15=%xmm0 -movdqa %xmm9,%xmm0 - -# qhasm: uint32323232 y15 += z14 -# asm 1: paddd r15=int6464#13 -# asm 2: movdqa r15=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y15 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z0=int6464#13 -# asm 2: movdqa z0=%xmm12 -movdqa 320(%rsp),%xmm12 - -# qhasm: z5 = z5_stack -# asm 1: movdqa z5=int6464#1 -# asm 2: movdqa z5=%xmm0 -movdqa 336(%rsp),%xmm0 - -# qhasm: unsigned>? i -= 2 -# asm 1: sub $2, -ja ._mainloop1 - -# qhasm: uint32323232 z0 += orig0 -# asm 1: paddd in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: z0 <<<= 96 -# asm 1: pshufd $0x39,in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: z0 <<<= 96 -# asm 1: pshufd $0x39,in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: z0 <<<= 96 -# asm 1: pshufd $0x39,in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) -# asm 1: xorl 192(in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: z4 <<<= 96 -# asm 1: pshufd $0x39,in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: z4 <<<= 96 -# asm 1: pshufd $0x39,in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: z4 <<<= 96 -# asm 1: pshufd $0x39,in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) -# asm 1: xorl 208(in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: z8 <<<= 96 -# asm 1: pshufd $0x39,in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: z8 <<<= 96 -# asm 1: pshufd $0x39,in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: z8 <<<= 96 -# asm 1: pshufd $0x39,in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) -# asm 1: xorl 224(in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: z12 <<<= 96 -# asm 1: pshufd $0x39,in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: z12 <<<= 96 -# asm 1: pshufd $0x39,in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: z12 <<<= 96 -# asm 1: pshufd $0x39,in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) -# asm 1: xorl 240(bytes=int64#6 -# asm 2: movq bytes=%r9 -movq 408(%rsp),%r9 - -# qhasm: bytes -= 256 -# asm 1: sub $256,? bytes - 0 -# asm 1: cmp $0, -jbe ._done -# comment:fp stack unchanged by fallthrough - -# qhasm: bytesbetween1and255: -._bytesbetween1and255: - -# qhasm: unsignedctarget=int64#3 -# asm 2: mov ctarget=%rdx -mov %rdi,%rdx - -# qhasm: out = &tmp -# asm 1: leaq out=int64#1 -# asm 2: leaq out=%rdi -leaq 416(%rsp),%rdi - -# qhasm: i = bytes -# asm 1: mov i=int64#4 -# asm 2: mov i=%rcx -mov %r9,%rcx - -# qhasm: while (i) { *out++ = *m++; --i } -rep movsb - -# qhasm: out = &tmp -# asm 1: leaq out=int64#1 -# asm 2: leaq out=%rdi -leaq 416(%rsp),%rdi - -# qhasm: m = &tmp -# asm 1: leaq m=int64#2 -# asm 2: leaq m=%rsi -leaq 416(%rsp),%rsi -# comment:fp stack unchanged by fallthrough - -# qhasm: nocopy: -._nocopy: - -# qhasm: bytes_backup = bytes -# asm 1: movq bytes_backup=stack64#8 -# asm 2: movq bytes_backup=408(%rsp) -movq %r9,408(%rsp) - -# qhasm: diag0 = x0 -# asm 1: movdqa diag0=int6464#1 -# asm 2: movdqa diag0=%xmm0 -movdqa 48(%rsp),%xmm0 - -# qhasm: diag1 = x1 -# asm 1: movdqa diag1=int6464#2 -# asm 2: movdqa diag1=%xmm1 -movdqa 0(%rsp),%xmm1 - -# qhasm: diag2 = x2 -# asm 1: movdqa diag2=int6464#3 -# asm 2: movdqa diag2=%xmm2 -movdqa 16(%rsp),%xmm2 - -# qhasm: diag3 = x3 -# asm 1: movdqa diag3=int6464#4 -# asm 2: movdqa diag3=%xmm3 -movdqa 32(%rsp),%xmm3 - -# qhasm: a0 = diag1 -# asm 1: movdqa a0=int6464#5 -# asm 2: movdqa a0=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: i = 20 -# asm 1: mov $20,>i=int64#4 -# asm 2: mov $20,>i=%rcx -mov $20,%rcx - -# qhasm: mainloop2: -._mainloop2: - -# qhasm: uint32323232 a0 += diag0 -# asm 1: paddd a1=int6464#6 -# asm 2: movdqa a1=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b0 = a0 -# asm 1: movdqa b0=int6464#7 -# asm 2: movdqa b0=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a0 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a2=int6464#5 -# asm 2: movdqa a2=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b1 = a1 -# asm 1: movdqa b1=int6464#7 -# asm 2: movdqa b1=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a1 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a3=int6464#6 -# asm 2: movdqa a3=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b2 = a2 -# asm 1: movdqa b2=int6464#7 -# asm 2: movdqa b2=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a2 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,a4=int6464#5 -# asm 2: movdqa a4=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b3 = a3 -# asm 1: movdqa b3=int6464#7 -# asm 2: movdqa b3=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a3 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,a5=int6464#6 -# asm 2: movdqa a5=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b4 = a4 -# asm 1: movdqa b4=int6464#7 -# asm 2: movdqa b4=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a4 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a6=int6464#5 -# asm 2: movdqa a6=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b5 = a5 -# asm 1: movdqa b5=int6464#7 -# asm 2: movdqa b5=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a5 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a7=int6464#6 -# asm 2: movdqa a7=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b6 = a6 -# asm 1: movdqa b6=int6464#7 -# asm 2: movdqa b6=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a6 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,a0=int6464#5 -# asm 2: movdqa a0=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b7 = a7 -# asm 1: movdqa b7=int6464#7 -# asm 2: movdqa b7=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a7 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,a1=int6464#6 -# asm 2: movdqa a1=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b0 = a0 -# asm 1: movdqa b0=int6464#7 -# asm 2: movdqa b0=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a0 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a2=int6464#5 -# asm 2: movdqa a2=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b1 = a1 -# asm 1: movdqa b1=int6464#7 -# asm 2: movdqa b1=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a1 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a3=int6464#6 -# asm 2: movdqa a3=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b2 = a2 -# asm 1: movdqa b2=int6464#7 -# asm 2: movdqa b2=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a2 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,a4=int6464#5 -# asm 2: movdqa a4=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b3 = a3 -# asm 1: movdqa b3=int6464#7 -# asm 2: movdqa b3=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a3 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,a5=int6464#6 -# asm 2: movdqa a5=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b4 = a4 -# asm 1: movdqa b4=int6464#7 -# asm 2: movdqa b4=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a4 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a6=int6464#5 -# asm 2: movdqa a6=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b5 = a5 -# asm 1: movdqa b5=int6464#7 -# asm 2: movdqa b5=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a5 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a7=int6464#6 -# asm 2: movdqa a7=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b6 = a6 -# asm 1: movdqa b6=int6464#7 -# asm 2: movdqa b6=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a6 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,? i -= 4 -# asm 1: sub $4,a0=int6464#5 -# asm 2: movdqa a0=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b7 = a7 -# asm 1: movdqa b7=int6464#7 -# asm 2: movdqa b7=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a7 <<= 18 -# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 -# asm 2: pxor >b0=%xmm7,>b0=%xmm7 -pxor %xmm7,%xmm7 - -# qhasm: uint32323232 b7 >>= 14 -# asm 1: psrld $14, -ja ._mainloop2 - -# qhasm: uint32323232 diag0 += x0 -# asm 1: paddd in0=int64#4 -# asm 2: movd in0=%rcx -movd %xmm0,%rcx - -# qhasm: in12 = diag1 -# asm 1: movd in12=int64#5 -# asm 2: movd in12=%r8 -movd %xmm1,%r8 - -# qhasm: in8 = diag2 -# asm 1: movd in8=int64#6 -# asm 2: movd in8=%r9 -movd %xmm2,%r9 - -# qhasm: in4 = diag3 -# asm 1: movd in4=int64#7 -# asm 2: movd in4=%rax -movd %xmm3,%rax - -# qhasm: diag0 <<<= 96 -# asm 1: pshufd $0x39,in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in1 = diag1 -# asm 1: movd in1=int64#5 -# asm 2: movd in1=%r8 -movd %xmm1,%r8 - -# qhasm: in13 = diag2 -# asm 1: movd in13=int64#6 -# asm 2: movd in13=%r9 -movd %xmm2,%r9 - -# qhasm: in9 = diag3 -# asm 1: movd in9=int64#7 -# asm 2: movd in9=%rax -movd %xmm3,%rax - -# qhasm: diag0 <<<= 96 -# asm 1: pshufd $0x39,in10=int64#4 -# asm 2: movd in10=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = diag1 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm1,%r8 - -# qhasm: in2 = diag2 -# asm 1: movd in2=int64#6 -# asm 2: movd in2=%r9 -movd %xmm2,%r9 - -# qhasm: in14 = diag3 -# asm 1: movd in14=int64#7 -# asm 2: movd in14=%rax -movd %xmm3,%rax - -# qhasm: diag0 <<<= 96 -# asm 1: pshufd $0x39,in15=int64#4 -# asm 2: movd in15=%rcx -movd %xmm0,%rcx - -# qhasm: in11 = diag1 -# asm 1: movd in11=int64#5 -# asm 2: movd in11=%r8 -movd %xmm1,%r8 - -# qhasm: in7 = diag2 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm2,%r9 - -# qhasm: in3 = diag3 -# asm 1: movd in3=int64#7 -# asm 2: movd in3=%rax -movd %xmm3,%rax - -# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) -# asm 1: xorl 60(bytes=int64#6 -# asm 2: movq bytes=%r9 -movq 408(%rsp),%r9 - -# qhasm: in8 = ((uint32 *)&x2)[0] -# asm 1: movl in8=int64#4d -# asm 2: movl in8=%ecx -movl 16(%rsp),%ecx - -# qhasm: in9 = ((uint32 *)&x3)[1] -# asm 1: movl 4+in9=int64#5d -# asm 2: movl 4+in9=%r8d -movl 4+32(%rsp),%r8d - -# qhasm: in8 += 1 -# asm 1: add $1,in9=int64#5 -# asm 2: mov in9=%r8 -mov %rcx,%r8 - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,x2=stack128#2 -# asm 2: movl x2=16(%rsp) -movl %ecx,16(%rsp) - -# qhasm: ((uint32 *)&x3)[1] = in9 -# asm 1: movl ? unsigned -ja ._bytesatleast65 -# comment:fp stack unchanged by jump - -# qhasm: goto bytesatleast64 if !unsigned< -jae ._bytesatleast64 - -# qhasm: m = out -# asm 1: mov m=int64#2 -# asm 2: mov m=%rsi -mov %rdi,%rsi - -# qhasm: out = ctarget -# asm 1: mov out=int64#1 -# asm 2: mov out=%rdi -mov %rdx,%rdi - -# qhasm: i = bytes -# asm 1: mov i=int64#4 -# asm 2: mov i=%rcx -mov %r9,%rcx - -# qhasm: while (i) { *out++ = *m++; --i } -rep movsb -# comment:fp stack unchanged by fallthrough - -# qhasm: bytesatleast64: -._bytesatleast64: -# comment:fp stack unchanged by fallthrough - -# qhasm: done: -._done: - -# qhasm: r11_caller = r11_stack -# asm 1: movq r11_caller=int64#9 -# asm 2: movq r11_caller=%r11 -movq 352(%rsp),%r11 - -# qhasm: r12_caller = r12_stack -# asm 1: movq r12_caller=int64#10 -# asm 2: movq r12_caller=%r12 -movq 360(%rsp),%r12 - -# qhasm: r13_caller = r13_stack -# asm 1: movq r13_caller=int64#11 -# asm 2: movq r13_caller=%r13 -movq 368(%rsp),%r13 - -# qhasm: r14_caller = r14_stack -# asm 1: movq r14_caller=int64#12 -# asm 2: movq r14_caller=%r14 -movq 376(%rsp),%r14 - -# qhasm: r15_caller = r15_stack -# asm 1: movq r15_caller=int64#13 -# asm 2: movq r15_caller=%r15 -movq 384(%rsp),%r15 - -# qhasm: rbx_caller = rbx_stack -# asm 1: movq rbx_caller=int64#14 -# asm 2: movq rbx_caller=%rbx -movq 392(%rsp),%rbx - -# qhasm: rbp_caller = rbp_stack -# asm 1: movq rbp_caller=int64#15 -# asm 2: movq rbp_caller=%rbp -movq 400(%rsp),%rbp - -# qhasm: leave -add %r11,%rsp -xor %rax,%rax -xor %rdx,%rdx -ret - -# qhasm: bytesatleast65: -._bytesatleast65: - -# qhasm: bytes -= 64 -# asm 1: sub $64,