Skip to content

Commit

Permalink
refs #6: Prevent potential symbol conflicts
Browse files Browse the repository at this point in the history
 * When using heterogeneous GPUs, there are multiple occurrences of CUDA
   kernels for different PTX assembly versions.  I don't know how
   exactly nvcc treats "static const" variables when generting multiple
   cubin binaries, but we can just choose NOT to depend on such
   behaviour for device-side datablock indices.
  • Loading branch information
achimnol committed May 26, 2015
1 parent fcef4fb commit 1fad3ee
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 20 deletions.
8 changes: 4 additions & 4 deletions elements/ip/IPlookup_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
extern "C" {

/* The index is given by the order in get_used_datablocks(). */
static const __device__ int dbid_ipv4_dest_addrs = 0;
static const __device__ int dbid_ipv4_lookup_results = 1;
#define dbid_ipv4_dest_addrs_d (0)
#define dbid_ipv4_lookup_results_d (1)

/* The GPU kernel. */
__global__ void ipv4_route_lookup_cuda(
Expand All @@ -37,8 +37,8 @@ __global__ void ipv4_route_lookup_cuda(
if (idx < count) {
uint16_t batch_idx = batch_ids[idx];
uint16_t item_idx = item_ids[idx];
struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv4_dest_addrs];
struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv4_lookup_results];
struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv4_dest_addrs_d];
struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv4_lookup_results_d];
uint32_t daddr = ((uint32_t*) db_dest_addrs->buffer_bases_in[batch_idx])[item_idx];
uint16_t *lookup_result = &((uint16_t *) db_results->buffer_bases_out[batch_idx])[item_idx];

Expand Down
16 changes: 8 additions & 8 deletions elements/ipsec/IPsecAES_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
#include "../../engines/cuda/compat.hh"

/* The index is given by the order in get_used_datablocks(). */
static const __device__ int dbid_enc_payloads = 0;
static const __device__ int dbid_iv = 1;
static const __device__ int dbid_flow_ids = 2;
static const __device__ int dbid_aes_block_info = 3;
#define dbid_enc_payloads_d (0)
#define dbid_iv_d (1)
#define dbid_flow_ids_d (2)
#define dbid_aes_block_info_d (3)

#ifndef __AES_CORE__ /*same constants are defined in ssl/aes/aes_core.h */
#define __AES_CORE__
Expand Down Expand Up @@ -698,10 +698,10 @@ __global__ void AES_ctr_encrypt_chunk_SharedMem_5(
const uint16_t batch_idx = batch_ids[idx];
const uint16_t item_idx = item_ids[idx];

const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads];
const struct datablock_kernel_arg *db_iv = &datablocks[dbid_iv];
const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids];
const struct datablock_kernel_arg *db_aes_block_info = &datablocks[dbid_aes_block_info];
const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads_d];
const struct datablock_kernel_arg *db_iv = &datablocks[dbid_iv_d];
const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids_d];
const struct datablock_kernel_arg *db_aes_block_info = &datablocks[dbid_aes_block_info_d];

assert(batch_idx < 32);
assert(item_idx < db_aes_block_info->item_count_in[batch_idx]);
Expand Down
8 changes: 4 additions & 4 deletions elements/ipsec/IPsecAuthHMACSHA1_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#include "../../engines/cuda/compat.hh"

/* The index is given by the order in get_used_datablocks(). */
static const __device__ int dbid_enc_payloads = 0;
static const __device__ int dbid_flow_ids = 1;
#define dbid_enc_payloads_d (0)
#define dbid_flow_ids_d (1)

#define SHA1_THREADS_PER_BLK 32

Expand Down Expand Up @@ -1248,8 +1248,8 @@ __global__ void computeHMAC_SHA1_3(
const uint16_t batch_idx = batch_ids[idx];
const uint16_t item_idx = item_ids[idx];
assert(item_idx < 64);
const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads];
const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids];
const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads_d];
const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids_d];

const uint8_t *enc_payload_base = (uint8_t *) db_enc_payloads->buffer_bases_in[batch_idx];
const uintptr_t offset = (uintptr_t) db_enc_payloads->item_offsets_in[batch_idx][item_idx];
Expand Down
8 changes: 4 additions & 4 deletions elements/ipv6/LookupIP6Route_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ __device__ uint32_t gpu_route_lookup_one(uint64_t ip0, uint64_t ip1,
return bmp;
}

static const __device__ int dbid_ipv6_dest_addrs = 0;
static const __device__ int dbid_ipv6_lookup_results = 1;
#define dbid_ipv6_dest_addrs_d (0)
#define dbid_ipv6_lookup_results_d (1)
struct __kernel_ipv6
{
uint64_t ip0;
Expand All @@ -144,8 +144,8 @@ __global__ void ipv6_route_lookup_cuda(
if (idx < count) {
uint16_t batch_idx = batch_ids[idx];
uint16_t item_idx = item_ids[idx];
struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv6_dest_addrs];
struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv6_lookup_results];
struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv6_dest_addrs_d];
struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv6_lookup_results_d];
struct __kernel_ipv6 daddr = ((struct __kernel_ipv6*) db_dest_addrs->buffer_bases_in[batch_idx])[item_idx];
uint16_t *lookup_result = &((uint16_t *) db_results->buffer_bases_out[batch_idx])[item_idx];

Expand Down

0 comments on commit 1fad3ee

Please sign in to comment.