Skip to content

Commit

Permalink
Finally fix IPsec GPU-only memory corruption bug.
Browse files Browse the repository at this point in the history
 * BIG THANKS to Keunhong for implementing void PMD and "slow" RX mode
   to emulate non-full RX computation batches and narrow down the scope.

 * Wrong item_idx vs. pkt_idx ........ orz

 * Wrong check of offset/length pairs.
   offset can be zero but length shouldn't be zero (if so, must be skipped).
  • Loading branch information
achimnol committed Mar 3, 2016
1 parent 56f1437 commit 584ead2
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
10 changes: 5 additions & 5 deletions elements/ipsec/IPsecAES_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -710,10 +710,10 @@ __global__ void AES_ctr_encrypt_chunk_SharedMem_5(
[item_idx];
const int pkt_idx = cur_block_info.pkt_idx;
const int block_idx_local = cur_block_info.block_idx;
const uintptr_t offset = (uintptr_t) db_enc_payloads->batches[batch_idx].item_offsets_in[item_idx].as_value<uintptr_t>();
const uintptr_t length = (uintptr_t) db_enc_payloads->batches[batch_idx].item_sizes_in[item_idx];
const uintptr_t offset = (uintptr_t) db_enc_payloads->batches[batch_idx].item_offsets_in[pkt_idx].as_value<uintptr_t>();
const uintptr_t length = (uintptr_t) db_enc_payloads->batches[batch_idx].item_sizes_in[pkt_idx];

if (cur_block_info.magic == 85739 && pkt_idx < 64 && offset != 0 && length != 0) {
if (cur_block_info.magic == 85739 && pkt_idx < 64 && length != 0) {
flow_id = ((uint64_t *) db_flow_ids->batches[batch_idx].buffer_bases_in)[pkt_idx];
if (flow_id != 65536)
assert(flow_id < 1024);
Expand All @@ -739,9 +739,9 @@ __global__ void AES_ctr_encrypt_chunk_SharedMem_5(

__syncthreads();

if (flow_id != 65536) {
if (flow_id != 65536 && length != 0) {
assert(flow_id < 1024);
assert(pkt_idx < 64);
assert(length != 0);

const uint8_t *const aes_key = flows[flow_id].aes_key;
uint8_t *iv = ((uint8_t *) db_iv->batches[batch_idx].buffer_bases_in
Expand Down
8 changes: 6 additions & 2 deletions src/lib/coprocessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ static void coproc_task_input_cb(struct ev_loop *loop, struct ev_async *watcher,
task->coproc_ctx = ctx;
task->copy_h2d();
task->execute();
//task->cctx->sync(); // for DEBUG
#ifdef DEBUG_OFFLOAD
task->cctx->sync();
#endif
/* We separate d2h copy step since CUDA implicitly synchronizes
* kernel executions. See more details at:
* http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#implicit-synchronization */
Expand Down Expand Up @@ -90,7 +92,9 @@ static void coproc_task_d2h_cb(struct ev_loop *loop, struct ev_async *watcher, i
ctx->d2h_pending_queue->pop_front();
if (task->poll_kernel_finished()) {
task->copy_d2h();
//task->cctx->sync(); // for DEBUG
#ifdef DEBUG_OFFLOAD
task->cctx->sync();
#endif
ctx->task_done_queue->push_back(task);
if (ctx->task_done_queue->size() >= NBA_MAX_KERNEL_OVERLAP || !ev_is_pending(ctx->task_input_watcher))
ev_feed_event(loop, ctx->task_done_watcher, EV_ASYNC);
Expand Down
2 changes: 1 addition & 1 deletion src/lib/offloadtask.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ OffloadTask::~OffloadTask()
{
}

#ifdef DEBUG
#ifdef DEBUG_OFFLOAD
#define _debug_print_inb(tag, batch, dbid) { \
size_t end = cctx->get_input_size(io_base); \
size_t len = end - last_input_size; \
Expand Down

0 comments on commit 584ead2

Please sign in to comment.