Skip to content

Commit

Permalink
Add array example [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
maawad committed Jan 24, 2024
1 parent dd668b9 commit c45e2a8
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 0 deletions.
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
# options
option(build_benchmarks "Build benchmarks" ON)
option(build_tests "Build tests" ON)
option(build_examples "Build examples" ON)


# Auto-detect GPU architecture, sets ${CUDA_ARCHS}
Expand All @@ -29,6 +30,9 @@ set(template_define "${CMAKE_CURRENT_SOURCE_DIR}/outputs")
# bght flags
add_library(bght INTERFACE)

add_library(bght::bght ALIAS bght)


# CUDA and C++ compiler flags
set(CXX_FLAGS
$<IF:$<CXX_COMPILER_ID:MSVC>,
Expand Down Expand Up @@ -101,3 +105,9 @@ endif(build_tests)
if(build_benchmarks)
add_subdirectory(benchmarks)
endif(build_benchmarks)



if(build_examples)
add_subdirectory(examples)
endif(build_examples)
12 changes: 12 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
function(add_tree_example target_name source_file)
add_executable(${target_name} ${source_file})
target_link_libraries(${target_name} PRIVATE bght::bght)
endfunction()

set(example_targets
array
)

foreach(target ${example_targets})
add_tree_example(${target} ${target}.cu)
endforeach()
82 changes: 82 additions & 0 deletions examples/array.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include <thrust/device_vector.h>
#include <cuda/std/array>

#include <cstdint>

#include <bcht.hpp>

// Testing passing a hashmap to the device
template <typename HashMap, typename Keys>
__global__ void test_kernel(HashMap map, Keys* keys) {
using pair_type = typename HashMap::value_type;
auto thread_id = threadIdx.x + blockIdx.x * blockDim.x;

// tile
auto block = cooperative_groups::this_thread_block();
auto tile = cooperative_groups::tiled_partition<HashMap::bucket_size>(block);

// pair to insert
auto key_id = thread_id / HashMap::bucket_size;
const auto key = keys[key_id];
const auto value = static_cast<typename HashMap::mapped_type>(key[0] * 10);

if (tile.thread_rank() == 0) {
printf("inserting keys[%i] = %i, value %i\n", key_id, key[0], value);
}

pair_type pair{key, value};

map.insert(pair, tile);

auto find_result = map.find(pair.first, tile);

if (tile.thread_rank() == 0) {
printf("value for keys[%i] = %i\n", key_id, find_result);
}
}

struct custom_key_hash {
using key_type = cuda::std::array<std::uint8_t, 20>;
using result_type = std::size_t;
constexpr custom_key_hash(uint32_t hash_x, uint32_t hash_y)
: hash_x_(hash_x), hash_y_(hash_y) {}

// just hash the first entry
constexpr result_type __host__ __device__ operator()(const key_type& key) const {
return (((hash_x_ ^ key[0]) + hash_y_) % prime_divisor);
}

custom_key_hash(const custom_key_hash&) = default;
custom_key_hash() = default;
custom_key_hash(custom_key_hash&&) = default;
custom_key_hash& operator=(custom_key_hash const&) = default;
custom_key_hash& operator=(custom_key_hash&&) = default;
~custom_key_hash() = default;
static constexpr uint32_t prime_divisor = 4294967291u;

private:
uint32_t hash_x_;
uint32_t hash_y_;
};

int main() {
using Config = cuda::std::array<std::uint8_t, 20>;
using V = int;

const auto sentinel_key = Config{0, 0, 0};
const auto sentinel_value = 0;

const std::size_t capacity = 5;

thrust::device_vector<Config> keys(2);
keys[0] = cuda::std::array<std::uint8_t, 20>{1};
keys[1] = cuda::std::array<std::uint8_t, 20>{2};

bght::bcht<Config, V, custom_key_hash> table(capacity, sentinel_key, sentinel_value);

// for simplicity launch one block per key and set the block size to tile/bucket size
const auto block_size = bght::bcht<Config, V>::bucket_size;
test_kernel<<<keys.size(), block_size>>>(table, keys.data().get());

cuda_try(cudaDeviceSynchronize());
}
2 changes: 2 additions & 0 deletions include/detail/cuda_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/

#pragma once
#include <iostream>

namespace bght {
#define _device_ __device__ __forceinline__
#define _host_device_ __device__ __host__ __forceinline__
Expand Down

0 comments on commit c45e2a8

Please sign in to comment.