From 1fad3ee0e65c7ab16cf5a1fedee665f4b8cdb463 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 26 May 2015 16:19:38 +0900 Subject: [PATCH] refs #6: Prevent potential symbol conflicts * When using heterogeneous GPUs, there are multiple occurrences of CUDA kernels for different PTX assembly versions. I don't know how exactly nvcc treats "static const" variables when generting multiple cubin binaries, but we can just choose NOT to depend on such behaviour for device-side datablock indices. --- elements/ip/IPlookup_kernel.cu | 8 ++++---- elements/ipsec/IPsecAES_kernel.cu | 16 ++++++++-------- elements/ipsec/IPsecAuthHMACSHA1_kernel.cu | 8 ++++---- elements/ipv6/LookupIP6Route_kernel.cu | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/elements/ip/IPlookup_kernel.cu b/elements/ip/IPlookup_kernel.cu index f79bee7..09138eb 100644 --- a/elements/ip/IPlookup_kernel.cu +++ b/elements/ip/IPlookup_kernel.cu @@ -21,8 +21,8 @@ extern "C" { /* The index is given by the order in get_used_datablocks(). */ -static const __device__ int dbid_ipv4_dest_addrs = 0; -static const __device__ int dbid_ipv4_lookup_results = 1; +#define dbid_ipv4_dest_addrs_d (0) +#define dbid_ipv4_lookup_results_d (1) /* The GPU kernel. */ __global__ void ipv4_route_lookup_cuda( @@ -37,8 +37,8 @@ __global__ void ipv4_route_lookup_cuda( if (idx < count) { uint16_t batch_idx = batch_ids[idx]; uint16_t item_idx = item_ids[idx]; - struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv4_dest_addrs]; - struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv4_lookup_results]; + struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv4_dest_addrs_d]; + struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv4_lookup_results_d]; uint32_t daddr = ((uint32_t*) db_dest_addrs->buffer_bases_in[batch_idx])[item_idx]; uint16_t *lookup_result = &((uint16_t *) db_results->buffer_bases_out[batch_idx])[item_idx]; diff --git a/elements/ipsec/IPsecAES_kernel.cu b/elements/ipsec/IPsecAES_kernel.cu index 9680912..ef54710 100644 --- a/elements/ipsec/IPsecAES_kernel.cu +++ b/elements/ipsec/IPsecAES_kernel.cu @@ -15,10 +15,10 @@ #include "../../engines/cuda/compat.hh" /* The index is given by the order in get_used_datablocks(). */ -static const __device__ int dbid_enc_payloads = 0; -static const __device__ int dbid_iv = 1; -static const __device__ int dbid_flow_ids = 2; -static const __device__ int dbid_aes_block_info = 3; +#define dbid_enc_payloads_d (0) +#define dbid_iv_d (1) +#define dbid_flow_ids_d (2) +#define dbid_aes_block_info_d (3) #ifndef __AES_CORE__ /*same constants are defined in ssl/aes/aes_core.h */ #define __AES_CORE__ @@ -698,10 +698,10 @@ __global__ void AES_ctr_encrypt_chunk_SharedMem_5( const uint16_t batch_idx = batch_ids[idx]; const uint16_t item_idx = item_ids[idx]; - const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads]; - const struct datablock_kernel_arg *db_iv = &datablocks[dbid_iv]; - const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids]; - const struct datablock_kernel_arg *db_aes_block_info = &datablocks[dbid_aes_block_info]; + const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads_d]; + const struct datablock_kernel_arg *db_iv = &datablocks[dbid_iv_d]; + const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids_d]; + const struct datablock_kernel_arg *db_aes_block_info = &datablocks[dbid_aes_block_info_d]; assert(batch_idx < 32); assert(item_idx < db_aes_block_info->item_count_in[batch_idx]); diff --git a/elements/ipsec/IPsecAuthHMACSHA1_kernel.cu b/elements/ipsec/IPsecAuthHMACSHA1_kernel.cu index 2fd23cf..f26a6c2 100644 --- a/elements/ipsec/IPsecAuthHMACSHA1_kernel.cu +++ b/elements/ipsec/IPsecAuthHMACSHA1_kernel.cu @@ -9,8 +9,8 @@ #include "../../engines/cuda/compat.hh" /* The index is given by the order in get_used_datablocks(). */ -static const __device__ int dbid_enc_payloads = 0; -static const __device__ int dbid_flow_ids = 1; +#define dbid_enc_payloads_d (0) +#define dbid_flow_ids_d (1) #define SHA1_THREADS_PER_BLK 32 @@ -1248,8 +1248,8 @@ __global__ void computeHMAC_SHA1_3( const uint16_t batch_idx = batch_ids[idx]; const uint16_t item_idx = item_ids[idx]; assert(item_idx < 64); - const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads]; - const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids]; + const struct datablock_kernel_arg *db_enc_payloads = &datablocks[dbid_enc_payloads_d]; + const struct datablock_kernel_arg *db_flow_ids = &datablocks[dbid_flow_ids_d]; const uint8_t *enc_payload_base = (uint8_t *) db_enc_payloads->buffer_bases_in[batch_idx]; const uintptr_t offset = (uintptr_t) db_enc_payloads->item_offsets_in[batch_idx][item_idx]; diff --git a/elements/ipv6/LookupIP6Route_kernel.cu b/elements/ipv6/LookupIP6Route_kernel.cu index 33db5cf..0a339bf 100644 --- a/elements/ipv6/LookupIP6Route_kernel.cu +++ b/elements/ipv6/LookupIP6Route_kernel.cu @@ -124,8 +124,8 @@ __device__ uint32_t gpu_route_lookup_one(uint64_t ip0, uint64_t ip1, return bmp; } -static const __device__ int dbid_ipv6_dest_addrs = 0; -static const __device__ int dbid_ipv6_lookup_results = 1; +#define dbid_ipv6_dest_addrs_d (0) +#define dbid_ipv6_lookup_results_d (1) struct __kernel_ipv6 { uint64_t ip0; @@ -144,8 +144,8 @@ __global__ void ipv6_route_lookup_cuda( if (idx < count) { uint16_t batch_idx = batch_ids[idx]; uint16_t item_idx = item_ids[idx]; - struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv6_dest_addrs]; - struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv6_lookup_results]; + struct datablock_kernel_arg *db_dest_addrs = &datablocks[dbid_ipv6_dest_addrs_d]; + struct datablock_kernel_arg *db_results = &datablocks[dbid_ipv6_lookup_results_d]; struct __kernel_ipv6 daddr = ((struct __kernel_ipv6*) db_dest_addrs->buffer_bases_in[batch_idx])[item_idx]; uint16_t *lookup_result = &((uint16_t *) db_results->buffer_bases_out[batch_idx])[item_idx];