diff --git a/tests/find_or_insert_test.cc.cu b/tests/find_or_insert_test.cc.cu index c87e22912..362cea2e9 100644 --- a/tests/find_or_insert_test.cc.cu +++ b/tests/find_or_insert_test.cc.cu @@ -1737,28 +1737,19 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) { options.max_hbm_for_vectors = nv::merlin::GB(max_hbm_for_vectors); options.evict_strategy = nv::merlin::EvictStrategy::kCustomized; - K* h_keys_base; - M* h_metas_base; - V* h_vectors_base; + K* h_keys_base = test_util::HostBuffer(BATCH_SIZE).ptr(); + M* h_metas_base = test_util::HostBuffer(BATCH_SIZE).ptr(); + V* h_vectors_base = test_util::HostBuffer(BATCH_SIZE * options.dim).ptr(); - K* h_keys_temp; - M* h_metas_temp; - V* h_vectors_temp; + K* h_keys_temp = test_util::HostBuffer(MAX_CAPACITY).ptr(); + M* h_metas_temp = test_util::HostBuffer(MAX_CAPACITY).ptr(); + V* h_vectors_temp = + test_util::HostBuffer(MAX_CAPACITY * options.dim).ptr(); K* d_keys_temp; M* d_metas_temp = nullptr; V* d_vectors_temp; - CUDA_CHECK(cudaMallocHost(&h_keys_base, BATCH_SIZE * sizeof(K))); - CUDA_CHECK(cudaMallocHost(&h_metas_base, BATCH_SIZE * sizeof(M))); - CUDA_CHECK( - cudaMallocHost(&h_vectors_base, BATCH_SIZE * sizeof(V) * options.dim)); - - CUDA_CHECK(cudaMallocHost(&h_keys_temp, MAX_CAPACITY * sizeof(K))); - CUDA_CHECK(cudaMallocHost(&h_metas_temp, MAX_CAPACITY * sizeof(M))); - CUDA_CHECK( - cudaMallocHost(&h_vectors_temp, MAX_CAPACITY * sizeof(V) * options.dim)); - CUDA_CHECK(cudaMalloc(&d_keys_temp, MAX_CAPACITY * sizeof(K))); CUDA_CHECK(cudaMalloc(&d_metas_temp, MAX_CAPACITY * sizeof(M))); CUDA_CHECK( @@ -1813,26 +1804,29 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) { MAX_CAPACITY, 0, d_keys_temp, d_vectors_temp, d_metas_temp, stream); CUDA_CHECK(cudaMemcpy(h_keys_temp, d_keys_temp, MAX_CAPACITY * sizeof(K), - cudaMemcpyDefault)); + cudaMemcpyDeviceToHost)); CUDA_CHECK(cudaMemcpy(h_metas_temp, d_metas_temp, - MAX_CAPACITY * sizeof(M), cudaMemcpyDefault)); + MAX_CAPACITY * sizeof(M), cudaMemcpyDeviceToHost)); CUDA_CHECK(cudaMemcpy(h_vectors_temp, d_vectors_temp, MAX_CAPACITY * sizeof(V) * options.dim, - cudaMemcpyDefault)); + cudaMemcpyDeviceToHost)); size_t bigger_meta_counter = 0; K max_key = 0; - + size_t values_error_counter = 0; for (int i = 0; i < dump_counter; i++) { ASSERT_EQ(h_keys_temp[i], h_metas_temp[i]); max_key = std::max(max_key, h_keys_temp[i]); if (h_metas_temp[i] >= expected_min_key) bigger_meta_counter++; for (int j = 0; j < options.dim; j++) { - ASSERT_EQ(h_vectors_temp[i * options.dim + j], - static_cast(h_keys_temp[i] * 0.00001)); + if (h_vectors_temp[i * options.dim + j] != + static_cast(h_keys_temp[i] * 0.00001)) { + values_error_counter++; + } } } + ASSERT_EQ(values_error_counter, 0); float correct_rate = (bigger_meta_counter * 1.0) / MAX_CAPACITY; std::cout << std::setprecision(3) << "[Round " << r << "]" << "correct_rate=" << correct_rate << std::endl; @@ -1842,14 +1836,6 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) { } CUDA_CHECK(cudaStreamDestroy(stream)); - CUDA_CHECK(cudaFreeHost(h_keys_base)); - CUDA_CHECK(cudaFreeHost(h_metas_base)); - CUDA_CHECK(cudaFreeHost(h_vectors_base)); - - CUDA_CHECK(cudaFreeHost(h_keys_temp)); - CUDA_CHECK(cudaFreeHost(h_metas_temp)); - CUDA_CHECK(cudaFreeHost(h_vectors_temp)); - CUDA_CHECK(cudaFree(d_keys_temp)); CUDA_CHECK(cudaFree(d_metas_temp)); CUDA_CHECK(cudaFree(d_vectors_temp)); @@ -2274,8 +2260,8 @@ void CheckFindOrInsertValues(Table* table, K* keys, V* values, M* metas, } } ASSERT_EQ(value_diff_cnt, 0); - std::cout << "Check find_or_insert behavior got value_diff_cnt: " - << value_diff_cnt + std::cout << "Check find_or_insert behavior got " + << "value_diff_cnt: " << value_diff_cnt << ", while table_size_before: " << table_size_before << ", while table_size_after: " << table_size_after << ", while len: " << len << std::endl; diff --git a/tests/insert_and_evict_test.cc.cu b/tests/insert_and_evict_test.cc.cu index 58dac30c8..f2b9e17d3 100644 --- a/tests/insert_and_evict_test.cc.cu +++ b/tests/insert_and_evict_test.cc.cu @@ -266,6 +266,13 @@ void CheckInsertAndEvict(Table* table, K* keys, V* values, M* metas, } } } + std::cout << "Check insert_and_evict behavior got " + << "key_miss_cnt: " << key_miss_cnt + << ", value_diff_cnt: " << value_diff_cnt + << ", while table_size_before: " << table_size_before + << ", while table_size_after: " << table_size_after + << ", while len: " << len << std::endl; + ASSERT_EQ(key_miss_cnt, 0); ASSERT_EQ(value_diff_cnt, 0); diff --git a/tests/merlin_hashtable_test.cc.cu b/tests/merlin_hashtable_test.cc.cu index 0b81af7be..56948a57a 100644 --- a/tests/merlin_hashtable_test.cc.cu +++ b/tests/merlin_hashtable_test.cc.cu @@ -1725,28 +1725,19 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) { options.max_hbm_for_vectors = nv::merlin::GB(max_hbm_for_vectors); options.evict_strategy = nv::merlin::EvictStrategy::kCustomized; - K* h_keys_base; - M* h_metas_base; - V* h_vectors_base; + K* h_keys_base = test_util::HostBuffer(BATCH_SIZE).ptr(); + M* h_metas_base = test_util::HostBuffer(BATCH_SIZE).ptr(); + V* h_vectors_base = test_util::HostBuffer(BATCH_SIZE * options.dim).ptr(); - K* h_keys_temp; - M* h_metas_temp; - V* h_vectors_temp; + K* h_keys_temp = test_util::HostBuffer(MAX_CAPACITY).ptr(); + M* h_metas_temp = test_util::HostBuffer(MAX_CAPACITY).ptr(); + V* h_vectors_temp = + test_util::HostBuffer(MAX_CAPACITY * options.dim).ptr(); K* d_keys_temp; M* d_metas_temp = nullptr; V* d_vectors_temp; - CUDA_CHECK(cudaMallocHost(&h_keys_base, BATCH_SIZE * sizeof(K))); - CUDA_CHECK(cudaMallocHost(&h_metas_base, BATCH_SIZE * sizeof(M))); - CUDA_CHECK( - cudaMallocHost(&h_vectors_base, BATCH_SIZE * sizeof(V) * options.dim)); - - CUDA_CHECK(cudaMallocHost(&h_keys_temp, MAX_CAPACITY * sizeof(K))); - CUDA_CHECK(cudaMallocHost(&h_metas_temp, MAX_CAPACITY * sizeof(M))); - CUDA_CHECK( - cudaMallocHost(&h_vectors_temp, MAX_CAPACITY * sizeof(V) * options.dim)); - CUDA_CHECK(cudaMalloc(&d_keys_temp, MAX_CAPACITY * sizeof(K))); CUDA_CHECK(cudaMalloc(&d_metas_temp, MAX_CAPACITY * sizeof(M))); CUDA_CHECK( @@ -1828,14 +1819,6 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) { } CUDA_CHECK(cudaStreamDestroy(stream)); - CUDA_CHECK(cudaFreeHost(h_keys_base)); - CUDA_CHECK(cudaFreeHost(h_metas_base)); - CUDA_CHECK(cudaFreeHost(h_vectors_base)); - - CUDA_CHECK(cudaFreeHost(h_keys_temp)); - CUDA_CHECK(cudaFreeHost(h_metas_temp)); - CUDA_CHECK(cudaFreeHost(h_vectors_temp)); - CUDA_CHECK(cudaFree(d_keys_temp)); CUDA_CHECK(cudaFree(d_metas_temp)); CUDA_CHECK(cudaFree(d_vectors_temp)); diff --git a/tests/test_util.cuh b/tests/test_util.cuh index 07d79ed0c..545299d92 100644 --- a/tests/test_util.cuh +++ b/tests/test_util.cuh @@ -502,4 +502,38 @@ std::array range(const T start) { } return result; } + +template +class HostBuffer { + public: + HostBuffer(const size_t size = 1) : ptr_(nullptr) { + if (!ptr_) { + size_ = size; + ptr_ = reinterpret_cast(malloc(sizeof(T) * size_)); + } + } + ~HostBuffer() { + try { + if (!ptr_) free(ptr_); + } catch (const nv::merlin::CudaException& e) { + cerr << "[HierarchicalKV] Failed to free HostBuffer!" << endl; + } + } + + __inline__ T* alloc_or_reuse(const size_t size = 0) { + if (size > size_) { + free(ptr_); + size_ = size; + reinterpret_cast(malloc(sizeof(T) * size_)); + } + return ptr_; + } + + __inline__ T* ptr() { return ptr_; } + + private: + T* ptr_; + size_t size_; +}; + } // namespace test_util