diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c3a5bc..56d2d1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE) # options option(build_benchmarks "Build benchmarks" ON) option(build_tests "Build tests" ON) +option(build_examples "Build examples" ON) # Auto-detect GPU architecture, sets ${CUDA_ARCHS} @@ -29,6 +30,9 @@ set(template_define "${CMAKE_CURRENT_SOURCE_DIR}/outputs") # bght flags add_library(bght INTERFACE) +add_library(bght::bght ALIAS bght) + + # CUDA and C++ compiler flags set(CXX_FLAGS $, @@ -101,3 +105,9 @@ endif(build_tests) if(build_benchmarks) add_subdirectory(benchmarks) endif(build_benchmarks) + + + +if(build_examples) + add_subdirectory(examples) +endif(build_examples) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..2eb69bc --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,12 @@ +function(add_tree_example target_name source_file) + add_executable(${target_name} ${source_file}) + target_link_libraries(${target_name} PRIVATE bght::bght) +endfunction() + +set(example_targets + array + ) + +foreach(target ${example_targets}) + add_tree_example(${target} ${target}.cu) +endforeach() \ No newline at end of file diff --git a/examples/array.cu b/examples/array.cu new file mode 100644 index 0000000..b947763 --- /dev/null +++ b/examples/array.cu @@ -0,0 +1,82 @@ +#include +#include + +#include + +#include + +// Testing passing a hashmap to the device +template +__global__ void test_kernel(HashMap map, Keys* keys) { + using pair_type = typename HashMap::value_type; + auto thread_id = threadIdx.x + blockIdx.x * blockDim.x; + + // tile + auto block = cooperative_groups::this_thread_block(); + auto tile = cooperative_groups::tiled_partition(block); + + // pair to insert + auto key_id = thread_id / HashMap::bucket_size; + const auto key = keys[key_id]; + const auto value = static_cast(key[0] * 10); + + if (tile.thread_rank() == 0) { + printf("inserting keys[%i] = %i, value %i\n", key_id, key[0], value); + } + + pair_type pair{key, value}; + + map.insert(pair, tile); + + auto find_result = map.find(pair.first, tile); + + if (tile.thread_rank() == 0) { + printf("value for keys[%i] = %i\n", key_id, find_result); + } +} + +struct custom_key_hash { + using key_type = cuda::std::array; + using result_type = std::size_t; + constexpr custom_key_hash(uint32_t hash_x, uint32_t hash_y) + : hash_x_(hash_x), hash_y_(hash_y) {} + + // just hash the first entry + constexpr result_type __host__ __device__ operator()(const key_type& key) const { + return (((hash_x_ ^ key[0]) + hash_y_) % prime_divisor); + } + + custom_key_hash(const custom_key_hash&) = default; + custom_key_hash() = default; + custom_key_hash(custom_key_hash&&) = default; + custom_key_hash& operator=(custom_key_hash const&) = default; + custom_key_hash& operator=(custom_key_hash&&) = default; + ~custom_key_hash() = default; + static constexpr uint32_t prime_divisor = 4294967291u; + + private: + uint32_t hash_x_; + uint32_t hash_y_; +}; + +int main() { + using Config = cuda::std::array; + using V = int; + + const auto sentinel_key = Config{0, 0, 0}; + const auto sentinel_value = 0; + + const std::size_t capacity = 5; + + thrust::device_vector keys(2); + keys[0] = cuda::std::array{1}; + keys[1] = cuda::std::array{2}; + + bght::bcht table(capacity, sentinel_key, sentinel_value); + + // for simplicity launch one block per key and set the block size to tile/bucket size + const auto block_size = bght::bcht::bucket_size; + test_kernel<<>>(table, keys.data().get()); + + cuda_try(cudaDeviceSynchronize()); +} \ No newline at end of file diff --git a/include/detail/cuda_helpers.cuh b/include/detail/cuda_helpers.cuh index e54a78d..5c82410 100644 --- a/include/detail/cuda_helpers.cuh +++ b/include/detail/cuda_helpers.cuh @@ -15,6 +15,8 @@ */ #pragma once +#include + namespace bght { #define _device_ __device__ __forceinline__ #define _host_device_ __device__ __host__ __forceinline__