Skip to content

Commit

Permalink
refs #6: Refactor all occurrences of ShiftedInt to dev_offset_t.
Browse files Browse the repository at this point in the history
 * This eases experiments to compare performance by different offset sizes.

 * Confirmed performance drop when dev_offset_t is ShiftedInt<uint32_t, 0>.
  • Loading branch information
achimnol committed Jan 22, 2016
1 parent f041e78 commit 07b67b8
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 8 deletions.
4 changes: 4 additions & 0 deletions include/nba/core/shiftedint.hh
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ private:
}
};

// Changing this to ShiftedInt<uint32_t, 0> or uint32_t exactly
// reproduces the performance drop when we first changed offset types.
typedef ShiftedInt<uint16_t, 2> dev_offset_t;

} /* endns(nba) */

#endif /* __NBA_CORE_SHIFTEDINT_HH__ */
Expand Down
4 changes: 2 additions & 2 deletions include/nba/engines/cuda/compat.hh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ struct datablock_batch_info {
uint32_t item_count_out;
uint16_t *item_sizes_in;
uint16_t *item_sizes_out;
nba::ShiftedInt<uint16_t, 2> *item_offsets_in;
nba::ShiftedInt<uint16_t, 2> *item_offsets_out;
nba::dev_offset_t *item_offsets_in;
nba::dev_offset_t *item_offsets_out;
}; // __cuda_aligned

struct datablock_kernel_arg {
Expand Down
8 changes: 4 additions & 4 deletions include/nba/framework/datablock.hh
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ struct item_size_info {
uint16_t size;
uint16_t sizes[NBA_MAX_COMP_BATCH_SIZE * 12];
};
ShiftedInt<uint16_t, 2>[NBA_MAX_COMP_BATCH_SIZE * 12];
dev_offset_t offsets[NBA_MAX_COMP_BATCH_SIZE * 12];
};
#else
struct item_size_info {
union {
uint16_t size;
uint16_t sizes[NBA_MAX_COMP_BATCH_SIZE * 96];
};
ShiftedInt<uint16_t, 2> offsets[NBA_MAX_COMP_BATCH_SIZE * 96];
dev_offset_t offsets[NBA_MAX_COMP_BATCH_SIZE * 96];
};
#endif

Expand Down Expand Up @@ -126,8 +126,8 @@ struct datablock_batch_info {
uint32_t item_count_out;
uint16_t *item_sizes_in;
uint16_t *item_sizes_out;
ShiftedInt<uint16_t, 2> *item_offsets_in;
ShiftedInt<uint16_t, 2> *item_offsets_out;
dev_offset_t *item_offsets_in;
dev_offset_t *item_offsets_out;
}; // __cuda_aligned

/**
Expand Down
4 changes: 2 additions & 2 deletions src/lib/offloadtask.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,10 @@ bool OffloadTask::copy_h2d()
dbarg_h->batches[b].item_sizes_out = (uint16_t *)
((char *) t->aligned_item_sizes_d.ptr
+ (uintptr_t) offsetof(struct item_size_info, sizes));
dbarg_h->batches[b].item_offsets_in = (ShiftedInt<uint16_t, 2> *)
dbarg_h->batches[b].item_offsets_in = (dev_offset_t *)
((char *) t->aligned_item_sizes_d.ptr
+ (uintptr_t) offsetof(struct item_size_info, offsets));
dbarg_h->batches[b].item_offsets_out = (ShiftedInt<uint16_t, 2> *)
dbarg_h->batches[b].item_offsets_out = (dev_offset_t *)
((char *) t->aligned_item_sizes_d.ptr
+ (uintptr_t) offsetof(struct item_size_info, offsets));
} else {
Expand Down

0 comments on commit 07b67b8

Please sign in to comment.