Skip to content

Commit

Permalink
Merge pull request kstppd#55 from markusbattarbee/pass_info
Browse files Browse the repository at this point in the history
Pass info
  • Loading branch information
kstppd authored May 30, 2024
2 parents 100c3c1 + c2f7abd commit 426a1c8
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 74 deletions.
61 changes: 30 additions & 31 deletions include/hashinator/hashers.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ class Hasher {

public:
// Overload with separate input for keys and values.
static void insert(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err,
static void insert(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len,
split_gpuStream_t s = 0) {
size_t blocks, blockSize;
*err = status::success;
info->err = status::success;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::insert_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
#ifndef NDEBUG
if (*err == status::fail) {
if (info->err == status::fail) {
std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
std::cerr << "Warning: Hashmap completely overflown in Device Insert.\nNot all ellements were "
"inserted!\nConsider resizing before calling insert"
Expand All @@ -65,17 +65,17 @@ class Hasher {
}

// Overload with input for keys only, using the index as the value
static void insertIndex(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower, size_t maxoverflow,
size_t* d_overflow, size_t* d_fill, size_t len, status* err, split_gpuStream_t s = 0) {
static void insertIndex(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, Hashinator::Info* info,
size_t len, split_gpuStream_t s = 0) {
size_t blocks, blockSize;
*err = status::success;
info->err = status::success;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::insert_index_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
<<<blocks, blockSize, 0, s>>>(keys, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
#ifndef NDEBUG
if (*err == status::fail) {
if (info->err == status::fail) {
std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
std::cerr << "Warning: Hashmap completely overflown in Device InsertIndex.\nNot all elements were "
"inserted!\nConsider resizing before calling insert"
Expand All @@ -87,18 +87,17 @@ class Hasher {

// Overload with hash_pair<key,val> (k,v) inputs
// Used by the tombstone cleaning method.
static void insert(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err,
split_gpuStream_t s = 0) {
static void insert(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {
size_t blocks, blockSize;
*err = status::success;
info->err = status::success;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::insert_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(src, buckets, sizePower, maxoverflow, d_overflow, d_fill, len, err);
<<<blocks, blockSize, 0, s>>>(src, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
#ifndef NDEBUG
if (*err == status::fail) {
if (info->err == status::fail) {
std::cerr << "***** Hashinator Runtime Warning ********" << std::endl;
std::cerr << "Warning: Hashmap completely overflown in Device Insert.\nNot all ellements were "
"inserted!\nConsider resizing before calling insert"
Expand All @@ -109,56 +108,56 @@ class Hasher {
}

// Retrieve wrapper
static void retrieve(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
static void retrieve(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {

size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
retrieve_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE, elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, sizePower, maxoverflow);
<<<blocks, blockSize, 0, s>>>(keys, vals, buckets, info);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

static void retrieve(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
static void retrieve(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {

size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
retrieve_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE, elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(src, buckets, sizePower, maxoverflow);
<<<blocks, blockSize, 0, s>>>(src, buckets, info);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

// Delete wrapper
static void erase(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, size_t* d_tombstoneCounter, int sizePower,
size_t maxoverflow, size_t len, split_gpuStream_t s = 0) {
static void erase(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {

size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::delete_kernel<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, TOMBSTONE, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(keys, buckets, d_tombstoneCounter, sizePower, maxoverflow, len);
<<<blocks, blockSize, 0, s>>>(keys, buckets, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

// Reset wrapper
static void reset(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst, const int sizePower,
size_t maxoverflow, Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {
static void reset(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst,
Hashinator::Info* info, size_t len, split_gpuStream_t s = 0) {
size_t blocks, blockSize;
launchParams(len, blocks, blockSize);
Hashinator::Hashers::reset_to_empty<KEY_TYPE, VAL_TYPE, EMPTYBUCKET, HashFunction, defaults::WARPSIZE,
elementsPerWarp>
<<<blocks, blockSize, 0, s>>>(src, dst, sizePower, maxoverflow, info, len);
<<<blocks, blockSize, 0, s>>>(src, dst, info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

// Reset wrapper for all elements
static void reset_all(hash_pair<KEY_TYPE, VAL_TYPE>* dst, Hashinator::Info* info, size_t len,
split_gpuStream_t s = 0) {
size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE;
blocksNeeded = blocksNeeded + (blocksNeeded == 0);
// fast ceil for positive ints
size_t blocksNeeded = len / defaults::MAX_BLOCKSIZE + (len % defaults::MAX_BLOCKSIZE != 0);
reset_all_to_empty<KEY_TYPE, VAL_TYPE, EMPTYBUCKET>
<<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst, info, len);
<<<blocksNeeded, defaults::MAX_BLOCKSIZE, 0, s>>>(dst,info, len);
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
}

Expand Down
23 changes: 8 additions & 15 deletions include/hashinator/hashinator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1281,11 +1281,9 @@ class Hashmap {
}
// If we do have overflown elements we put them back in the buckets
SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s));
DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
_mapInfo, nOverflownElements, s);
DeviceHasher::reset(overflownElements, buckets.data(), _mapInfo, nOverflownElements, s);

DeviceHasher::insert(overflownElements, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, nOverflownElements, &_mapInfo->err, s);
DeviceHasher::insert(overflownElements, buckets.data(), _mapInfo, nOverflownElements, s);

SPLIT_CHECK_ERR(split_gpuFreeAsync(overflownElements, s));
return;
Expand All @@ -1308,8 +1306,7 @@ class Hashmap {
resize(neededPowerSize, targets::device, s);
}
_mapInfo->currentMaxBucketOverflow = _mapInfo->currentMaxBucketOverflow;
DeviceHasher::insert(keys, vals, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, len, &_mapInfo->err, s);
DeviceHasher::insert(keys, vals, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1330,8 +1327,7 @@ class Hashmap {
resize(neededPowerSize, targets::device, s);
}
_mapInfo->currentMaxBucketOverflow = _mapInfo->currentMaxBucketOverflow;
DeviceHasher::insertIndex(keys, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, len, &_mapInfo->err, s);
DeviceHasher::insertIndex(keys, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1350,8 +1346,7 @@ class Hashmap {
if (neededPowerSize > _mapInfo->sizePower) {
resize(neededPowerSize, targets::device, s);
}
DeviceHasher::insert(src, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow,
&_mapInfo->currentMaxBucketOverflow, &_mapInfo->fill, len, &_mapInfo->err, s);
DeviceHasher::insert(src, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1361,8 +1356,7 @@ class Hashmap {
if constexpr (prefetches) {
buckets.optimizeGPU(s);
}
DeviceHasher::retrieve(keys, vals, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow, len,
s);
DeviceHasher::retrieve(keys, vals, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1372,7 +1366,7 @@ class Hashmap {
if constexpr (prefetches) {
buckets.optimizeGPU(s);
}
DeviceHasher::retrieve(src, buckets.data(), _mapInfo->sizePower, _mapInfo->currentMaxBucketOverflow, len, s);
DeviceHasher::retrieve(src, buckets.data(), _mapInfo, len, s);
return;
}

Expand All @@ -1384,8 +1378,7 @@ class Hashmap {
}
// Remember the last numeber of tombstones
size_t tbStore = tombstone_count();
DeviceHasher::erase(keys, buckets.data(), &_mapInfo->tombstoneCounter, _mapInfo->sizePower,
_mapInfo->currentMaxBucketOverflow, len, s);
DeviceHasher::erase(keys, buckets.data(), _mapInfo, len, s);
size_t tombstonesAdded = tombstone_count() - tbStore;
// Fill should be decremented by the number of tombstones added;
_mapInfo->fill -= tombstonesAdded;
Expand Down
52 changes: 38 additions & 14 deletions include/hashinator/kernels_AMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,15 @@ template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::nume
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* dst,
const int sizePower, size_t maxoverflow, Hashinator::Info* info ,size_t len)
Hashinator::Info* info ,size_t len)

{
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
const size_t w_tid = tid % VIRTUALWARP;
const int sizePower = info->sizePower;
//const size_t maxoverflow = info->currentMaxBucketOverflow;

// Early quit if we have more warps than elements to insert
if (wid >= len) {
Expand Down Expand Up @@ -134,9 +136,14 @@ __global__ void reset_to_empty(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void insert_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err) {

__global__ void insert_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

size_t* d_overflow = &(info->currentMaxBucketOverflow);
size_t* d_fill = &(info->fill);
//status* err = &(info->err);
const int sizePower = info->sizePower;
//const size_t maxoverflow = info->currentMaxBucketOverflow;
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
Expand Down Expand Up @@ -265,9 +272,13 @@ __global__ void insert_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void insert_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err) {
__global__ void insert_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

const int sizePower = info->sizePower;
size_t* d_overflow = &(info->currentMaxBucketOverflow);
size_t* d_fill = &(info->fill);
//status* err = &(info->err);
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
Expand Down Expand Up @@ -401,8 +412,8 @@ __global__ void insert_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
KEY_TYPE TOMBSTONE = EMPTYBUCKET - 1, class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>,
int WARPSIZE = defaults::WARPSIZE, int elementsPerWarp>
__global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, size_t* d_tombstoneCounter,
int sizePower, size_t maxoverflow, size_t len) {
__global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
Expand All @@ -411,6 +422,10 @@ __global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buc
const size_t proper_w_tid = tid % WARPSIZE; // the proper WID as if we had no Virtual warps
const size_t proper_wid = tid / WARPSIZE;
const size_t blockWid = proper_wid % (WARPSIZE / 4); // we have twice the warpsize and half the warps per block
const int sizePower = info->sizePower;
const size_t maxoverflow = info->currentMaxBucketOverflow;
size_t* d_tombstoneCounter = &(info->tombstoneCounter);
//status* err = &(info->err);

__shared__ uint32_t deleteMask[WARPSIZE / 2];

Expand Down Expand Up @@ -492,9 +507,14 @@ __global__ void delete_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buc
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void insert_index_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow, size_t* d_overflow, size_t* d_fill, size_t len, status* err) {

__global__ void insert_index_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info, size_t len) {

size_t* d_overflow = &(info->currentMaxBucketOverflow);
size_t* d_fill = &(info->fill);
//status* err = &(info->err);
const int sizePower = info->sizePower;
//const size_t maxoverflow = info->currentMaxBucketOverflow;
const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
Expand Down Expand Up @@ -627,13 +647,15 @@ __global__ void insert_index_kernel(KEY_TYPE* keys, hash_pair<KEY_TYPE, VAL_TYPE
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void retrieve_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets, int sizePower,
size_t maxoverflow) {
__global__ void retrieve_kernel(KEY_TYPE* keys, VAL_TYPE* vals, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
Hashinator::Info* info) {

const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
const size_t w_tid = tid % VIRTUALWARP;
const int sizePower = info->sizePower;
const size_t maxoverflow = info->currentMaxBucketOverflow;

uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
Expand Down Expand Up @@ -682,12 +704,14 @@ template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::nume
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>, int WARPSIZE = defaults::WARPSIZE,
int elementsPerWarp>
__global__ void retrieve_kernel(hash_pair<KEY_TYPE, VAL_TYPE>* src, hash_pair<KEY_TYPE, VAL_TYPE>* buckets,
int sizePower, size_t maxoverflow) {
Hashinator::Info* info) {

const int VIRTUALWARP = WARPSIZE / elementsPerWarp;
const size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
const size_t wid = tid / VIRTUALWARP;
const size_t w_tid = tid % VIRTUALWARP;
const int sizePower = info->sizePower;
const size_t maxoverflow = info->currentMaxBucketOverflow;

uint64_t subwarp_relative_index = (wid) % (WARPSIZE / VIRTUALWARP);
uint64_t submask;
Expand Down
Loading

0 comments on commit 426a1c8

Please sign in to comment.