Skip to content

Commit

Permalink
avoid regression when run length is small
Browse files Browse the repository at this point in the history
  • Loading branch information
stdpain committed Jul 20, 2024
1 parent e2e0cd0 commit 4ec1eaf
Showing 1 changed file with 27 additions and 19 deletions.
46 changes: 27 additions & 19 deletions src/containers/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -1017,25 +1017,33 @@ int _avx2_run_container_to_uint32_array(void *vout, const run_container_t *cont,
for (int i = 0; i < cont->n_runs; ++i) {
uint32_t run_start = base + cont->runs[i].value;
uint16_t le = cont->runs[i].length;
int j = 0;
__m256i run_start_v = _mm256_set1_epi32(run_start);
// [8,8,8,8....]
__m256i inc = _mm256_set1_epi32(8);
// used for generate sequence:
// [0, 1, 2, 3...], [8, 9, 10,...]
__m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
for (j = 0; j + 8 <= le; j += 8) {
__m256i val_v = _mm256_add_epi32(run_start_v, delta);
_mm256_storeu_si256((__m256i *)(out + outpos), val_v);
delta = _mm256_add_epi32(inc, delta);
outpos += 8;
}

for (; j <= le; ++j) {
uint32_t val = run_start + j;
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
outpos++;
if (__builtin_expect(le < 8, 1)) {
for (int j = 0; j <= le; ++j) {
uint32_t val = run_start + j;
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
outpos++;
}
} else {
int j = 0;
__m256i run_start_v = _mm256_set1_epi32(run_start);
// [8,8,8,8....]
__m256i inc = _mm256_set1_epi32(8);
// used for generate sequence:
// [0, 1, 2, 3...], [8, 9, 10,...]
__m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
for (j = 0; j + 8 <= le; j += 8) {
__m256i val_v = _mm256_add_epi32(run_start_v, delta);
_mm256_storeu_si256((__m256i *)(out + outpos), val_v);
delta = _mm256_add_epi32(inc, delta);
outpos += 8;
}
for (; j <= le; ++j) {
uint32_t val = run_start + j;
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
outpos++;
}
}
}
return outpos;
Expand Down

0 comments on commit 4ec1eaf

Please sign in to comment.