Add iceberg w/ double hashing (#19)

* Add iterators functions * Add missing include * Add IHT with double hashing * Change default hash function * Change default hash function * Add results using IDHT * Modernize cmake * Add examples direcotry * Fixup repo header * Cleanup * Remove outdated results vs. cuco * Fix link * Add results from 2080
owensgroup · Jan 24, 2024 · f0313cf · f0313cf
1 parent dff559c
commit f0313cf
Show file tree

Hide file tree

Showing 84 changed files with 119,391 additions and 2,699 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,8 +1,4 @@
 cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
-if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.8)
-  cmake_policy(SET CMP0104 OLD)
-endif()
-
 
 project(bght LANGUAGES CXX CUDA)
 
@@ -17,18 +13,13 @@ option(build_benchmarks "Build benchmarks" ON)
 option(build_tests "Build tests" ON)
 option(build_examples "Build examples" ON)
 
-
-# Auto-detect GPU architecture, sets ${CUDA_ARCHS}
-include("cmake/AutoDetectCudaArch.cmake")
-
 # Direct all output to /bin directory
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 
-set(template_define "${CMAKE_CURRENT_SOURCE_DIR}/outputs")
-
 
 # bght flags
 add_library(bght INTERFACE)
+add_library(bght::bght ALIAS bght)
 
 add_library(bght::bght ALIAS bght)
 
@@ -49,7 +40,6 @@ set(CUDA_FLAGS
     $<IF:$<CONFIG:Debug>,
         -G -src-in-ptx,
         -lineinfo>
-    ${CUDA_ARCHS}
 	--expt-extended-lambda
     -extended-lambda
     -Wno-deprecated-gpu-targets #silence deprecated architectures

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # [BGHT: Better GPU Hash Tables](https://owensgroup.github.io/BGHT/)
 
-| [**Documentation**](https://owensgroup.github.io/BGHT/) | [**Examples/Tests**](https://github.com/owensgroup/BGHT/tree/main/test)  | [**Benchmarks**](https://github.com/owensgroup/BGHT/tree/main/benchmarks) | [**Results**](https://github.com/owensgroup/BGHT/blob/main/results.md) |
-|--------------|----------------------|-------------------|-------------------|
+| [**Documentation**](https://owensgroup.github.io/BGHT/) | [**Examples**](https://github.com/owensgroup/BGHT/tree/main/test)  | [**Examples**](https://github.com/owensgroup/BGHT/tree/main/examples)  |  [**Benchmarks**](https://github.com/owensgroup/BGHT/tree/main/benchmarks) | [**Results**](https://github.com/owensgroup/BGHT/blob/main/results.md) |
+|--------------|----------------------|-------------------|-------------------|-------------------|
 
 BGHT is a collection of high-performance static GPU hash tables. BGHT contains hash tables that use three different probing schemes 1) bucketed cuckoo, 2) power-of-two, 3) iceberg hashing. Our bucketed static cuckoo hash table is the state-of-art static hash table.
 For more information, please check our papers:
@@ -16,7 +16,7 @@ For more information, please check our papers:
 * Standard-like APIs
 
 ## How to use
-BGHT is a header-only library. To use the library, you can add it as a submodule or use [CMake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake) to fetch the library into your CMake-based project ([complete example](https://github.com/owensgroup/BGHT/tree/main/test/cpm)).
+BGHT is a header-only library. To use the library, you can add it as a submodule or use [CMake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake) to fetch the library into your CMake-based project ([complete example](https://github.com/owensgroup/BGHT/tree/main/examples/cpm)).
 ```
 cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
 CPMAddPackage(
@@ -146,10 +146,6 @@ To reproduce the results, follow the following [steps](reproduce.md). You can al
 ## Benchmarks
 Please check our [paper](https://arxiv.org/abs/2108.07232) for comprehensive analysis and benchmarks. Also, see the following steps to [reproduce](reproduce.md) the results.
 
-An additional comparison of our BCHT to `cucCollection`'s `cuco::static_map` is shown below. The comparison is between our BCHT with B = 16 (default configuration) and `cuco::static_map`. Input keys (50 million pairs) are uniformly distributed unsigned keys. The benchmarking was performed on an NVIDIA Titan V GPU (higher is better):
-
-![](/figs/arxiv/NVIDIA-TITAN-V/bcht_vs_cuco.svg)
-
 ## Questions and bug report
 Please create an issue. We will welcome any contributions that improve the usability and quality of our repository.
 

diff --git a/cmake/AutoDetectCudaArch.cmake b/cmake/AutoDetectCudaArch.cmake
diff --git a/cmake/DownloadGoogleTest.cmake b/cmake/DownloadGoogleTest.cmake
diff --git a/cmake/FindGoogleTest.cmake b/cmake/FindGoogleTest.cmake
diff --git a/test/test_1cht.cu → examples/1cht_example.cu b/test/test_1cht.cu → examples/1cht_example.cu
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -4,7 +4,12 @@ function(add_tree_example target_name source_file)
 endfunction()
 
 set(example_targets
-    array
+    array_example
+    custom_allocator_example
+    1cht_example
+    bcht_example
+    custom_types_example
+    iht_example
     )
 
 foreach(target ${example_targets})

diff --git a/examples/array.cu → examples/array_example.cu b/examples/array.cu → examples/array_example.cu
diff --git a/test/test_bcht.cu → examples/bcht_example.cu b/test/test_bcht.cu → examples/bcht_example.cu
diff --git a/test/cpm/CMakeLists.txt → examples/cpm/CMakeLists.txt b/test/cpm/CMakeLists.txt → examples/cpm/CMakeLists.txt
diff --git a/test/cpm/test_cpm.cu → examples/cpm/test_cpm.cu b/test/cpm/test_cpm.cu → examples/cpm/test_cpm.cu
diff --git a/test/test_allocator.cu → examples/custom_allocator_example.cu b/test/test_allocator.cu → examples/custom_allocator_example.cu
diff --git a/test/test_types.cu → examples/custom_types_example.cu b/test/test_types.cu → examples/custom_types_example.cu
diff --git a/examples/iht_example.cu b/examples/iht_example.cu
@@ -0,0 +1,148 @@
+/*
+ *   Copyright 2021 The Regents of the University of California, Davis
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ */
+
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <thrust/sequence.h>
+#include <cmd.hpp>
+#include <gpu_timer.hpp>
+#include <iht.hpp>
+#include <limits>
+#include <perf_report.hpp>
+#include <rkg.hpp>
+#include <type_traits>
+
+template <template <typename, typename, int> class hashmap, int threshold>
+void do_test(int argc, char** argv) {
+  using key_type = uint32_t;
+  using value_type = uint32_t;
+
+  auto arguments = std::vector<std::string>(argv, argv + argc);
+  std::size_t num_keys =
+      get_arg_value<std::size_t>(arguments, "num-keys").value_or(16ull);
+  double load_factor = get_arg_value<double>(arguments, "load-factor").value_or(0.9);
+  int device = get_arg_value<int>(arguments, "device").value_or(0);
+  int bucket_size = get_arg_value<int>(arguments, "b").value_or(16);
+
+  std::cout << "num-keys: " << num_keys << '\n';
+  std::cout << "load-factor: " << load_factor << '\n';
+  std::cout << "bucket-size: " << bucket_size << '\n';
+  std::cout << "threshold: " << threshold << '\n';
+  bght::set_device(device);
+
+  std::size_t capacity = double(num_keys) / load_factor;
+
+  auto invalid_key = std::numeric_limits<key_type>::max();
+  auto invalid_value = std::numeric_limits<value_type>::max();
+
+  using pair_type = bght::pair<key_type, value_type>;
+
+  std::vector<key_type> h_keys;
+  std::vector<key_type> h_values;
+  rkg::generate_uniform_unique_pairs(h_keys, h_values, num_keys);
+
+  thrust::device_vector<key_type> d_keys(num_keys);
+  thrust::device_vector<pair_type> d_pairs(num_keys);
+  thrust::device_vector<key_type> d_queries(num_keys);
+  thrust::device_vector<value_type> d_results(num_keys);
+
+  d_keys = h_keys;
+
+  // assign values
+  auto to_pair = [] __host__ __device__(key_type x) { return pair_type{x, x * 10}; };
+  thrust::transform(
+      thrust::device, d_keys.begin(), d_keys.end(), d_pairs.begin(), to_pair);
+
+  // prepare queries
+  d_queries = d_keys;
+
+  hashmap<key_type, value_type, threshold> test(capacity, invalid_key, invalid_value);
+
+  auto input_start = d_pairs.data().get();
+  auto input_last = input_start + num_keys;
+
+  auto queries_start = d_queries.data().get();
+  auto queries_last = queries_start + num_keys;
+  auto output_start = d_results.data().get();
+
+  cudaStream_t stream;
+  cudaStreamCreate(&stream);
+
+  gpu_timer insertion_timer(stream);
+  insertion_timer.start_timer();
+  auto insertion_success = test.insert(input_start, input_last, stream);
+  insertion_timer.stop_timer();
+  auto insertion_s = insertion_timer.get_elapsed_s();
+
+  cuda_try(cudaStreamSynchronize(stream));
+
+  gpu_timer find_timer(stream);
+  find_timer.start_timer();
+  test.find(queries_start, queries_last, output_start, stream);
+  find_timer.stop_timer();
+  auto find_s = find_timer.get_elapsed_s();
+
+  cuda_try(cudaDeviceSynchronize());
+
+  // Comoute stats
+  if (!insertion_success) {
+    std::cout << "Insertion failed\n";
+    std::terminate();
+  }
+  std_cout_perf_report(insertion_s, find_s, num_keys, num_keys);
+
+  // validation
+  thrust::host_vector<value_type> h_results = d_results;
+  thrust::host_vector<value_type> h_queries = d_queries;
+  for (std::size_t i = 0; i < num_keys; i++) {
+    auto key = h_queries[i];
+    auto expected_pair = to_pair(key);
+    auto found_result = h_results[i];
+    if (expected_pair.second != found_result) {
+      std::cout << "Error: expected: " << expected_pair.second;
+      std::cout << ", found: " << found_result << '\n';
+      return;
+    }
+  }
+  std::cout << "Success\n";
+}
+int main(int argc, char** argv) {
+  auto arguments = std::vector<std::string>(argv, argv + argc);
+  int bucket_size = get_arg_value<int>(arguments, "b").value_or(16);
+  int threshold = get_arg_value<int>(arguments, "t").value_or(12);
+
+  if (threshold == 0) {
+    if (bucket_size == 8) {
+      do_test<iht8, 0>(argc, argv);
+    } else if (bucket_size == 16) {
+      do_test<iht16, 0>(argc, argv);
+    } else if (bucket_size == 32) {
+      do_test<iht32, 0>(argc, argv);
+    } else {
+      std::cout << "Not supported bucket size\n";
+    }
+  } else {
+    if (bucket_size == 8) {
+      do_test<iht8, 6>(argc, argv);
+    } else if (bucket_size == 16) {
+      do_test<iht16, 12>(argc, argv);
+    } else if (bucket_size == 32) {
+      do_test<iht32, 25>(argc, argv);
+    } else {
+      std::cout << "Not supported bucket size\n";
+    }
+  }
+}