Skip to content

Commit

Permalink
Hacking.
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Aug 25, 2016
1 parent ee32efc commit 88eddb0
Show file tree
Hide file tree
Showing 3 changed files with 2,116 additions and 5,712 deletions.
8 changes: 4 additions & 4 deletions scripts/avx512dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,16 @@ def plurial(number):
if(firstshift + bit <> 32):
wfirst = maskstr.format(wfirst)
print(" wout = {0}; // 512-bit word to be output".format(wfirst));
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(dictionary,_mm512_castsi512_si256(wout), 8)); // load from dictionary and store".format(2*j))
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(dictionary,_mm512_extracti64x4_epi64(wout,1), 8)); // load from dictionary and store".format(2*j+1))
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(_mm512_castsi512_si256(wout),dictionary, 8)); // load from dictionary and store".format(2*j))
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(_mm512_extracti64x4_epi64(wout,1),dictionary, 8)); // load from dictionary and store".format(2*j+1))
else:
secondshift = (32-firstshift)
wsecond = "_mm512_slli_epi32( w{0} , {1} ) ".format((firstword+1)%2,secondshift)
wfirstorsecond = " _mm512_or_si512 ({0},{1}) ".format(wfirst,wsecond)
wfirstorsecond = maskstr.format(wfirstorsecond)
print(" wout = {0}; // 512-bit word to be output".format(wfirstorsecond));
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(dictionary,_mm512_castsi512_si256(wout), 8)); // load from dictionary and store".format(2*j))
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(dictionary,_mm512_extracti64x4_epi64(wout,1), 8)); // load from dictionary and store".format(2*j+1))
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(_mm512_castsi512_si256(wout),dictionary, 8)); // load from dictionary and store".format(2*j))
print(" _mm512_storeu_si512(out + {0},_mm512_i32gather_epi64(_mm512_extracti64x4_epi64(wout,1),dictionary, 8)); // load from dictionary and store".format(2*j+1))
print("}");
print("")

Expand Down
15 changes: 9 additions & 6 deletions src/avx512bpacking.h
Original file line number Diff line number Diff line change
Expand Up @@ -7770,18 +7770,21 @@ static avx512unpackblockfnc avx512funcUnpackArr[] = {


/* reads 512 values from "in", writes "bit" 512-bit vectors to "out" */
void avx512pack(const uint32_t * in,__m512i * out, const uint32_t bit) {
avx512funcPackMaskArr[bit](in,out);
void avx512pack(const uint32_t * in,__m512i * out, const uint32_t number, const uint32_t bit) {
for(uint32_t i = 0; i < number / 512; ++i)
avx512funcPackMaskArr[bit](in + i * 512,out + i * bit);
}

/* reads 512 values from "in", writes "bit" 512-bit vectors to "out" */
void avx512packwithoutmask(const uint32_t * in,__m512i * out, const uint32_t bit) {
avx512funcPackArr[bit](in,out);
void avx512packwithoutmask(const uint32_t * in,__m512i * out, const uint32_t number, const uint32_t bit) {
for(uint32_t i = 0; i < number / 512; ++i)
avx512funcPackArr[bit](in + i * 512,out + i * bit);
}

/* reads "bit" 512-bit vectors from "in", writes 512 values to "out" */
void avx512unpack(const __m512i * in,uint32_t * out, const uint32_t bit) {
avx512funcUnpackArr[bit](in,out);
void avx512unpack(const __m512i * in,uint32_t * out, const uint32_t number, const uint32_t bit) {
for(uint32_t i = 0; i < number / 512; ++i)
avx512funcUnpackArr[bit](in + i * bit,out + i * 512);
}

#endif
Loading

0 comments on commit 88eddb0

Please sign in to comment.