diff --git a/include/bucket_list_hash_table.cuh b/include/bucket_list_hash_table.cuh index b451bdf..a8ccaa2 100644 --- a/include/bucket_list_hash_table.cuh +++ b/include/bucket_list_hash_table.cuh @@ -300,10 +300,12 @@ public: } else { - append_status = value_store_.append( - *(handles[btid]), - values_in[block_offset + btid], - max_values_per_key_); + if(block_offset + btid < num_in){ + append_status = value_store_.append( + *(handles[btid]), + values_in[block_offset + btid], + max_values_per_key_); + } } if(append_status.has_any()) @@ -311,15 +313,19 @@ public: device_join_status(append_status); } - // TODO not zero-cost - if(!std::is_same< - StatusHandler, - status_handlers::ReturnNothing>::value) - { - StatusHandler::handle( - status[btid]+append_status, - status_out, - block_offset + btid); + if(block_offset + btid < num_in){ + + // TODO not zero-cost + if(!std::is_same< + StatusHandler, + status_handlers::ReturnNothing>::value) + { + StatusHandler::handle( + status[btid]+append_status, + status_out, + block_offset + btid); + } + } } } @@ -449,7 +455,7 @@ public: } kernels::retrieve - <<>> + <<>> ( keys_in, num_in, @@ -465,7 +471,7 @@ public: if(status_out != nullptr) { helpers::lambda_kernel - <<>> + <<>> ([=, *this] DEVICEQUALIFIER { const index_type tid = helpers::global_thread_id(); @@ -793,7 +799,7 @@ public: cudaMemsetAsync(tmp, 0, sizeof(index_type), stream); kernels::num_values - <<>> + <<>> (keys_in, num_in, tmp, num_per_key_out, *this, probing_length, status_out); cudaMemcpyAsync(&num_out, tmp, sizeof(index_type), D2H, stream);