diff --git a/tests/find_or_insert_test.cc.cu b/tests/find_or_insert_test.cc.cu
index c87e22912..362cea2e9 100644
--- a/tests/find_or_insert_test.cc.cu
+++ b/tests/find_or_insert_test.cc.cu
@@ -1737,28 +1737,19 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) {
   options.max_hbm_for_vectors = nv::merlin::GB(max_hbm_for_vectors);
   options.evict_strategy = nv::merlin::EvictStrategy::kCustomized;
 
-  K* h_keys_base;
-  M* h_metas_base;
-  V* h_vectors_base;
+  K* h_keys_base = test_util::HostBuffer<K>(BATCH_SIZE).ptr();
+  M* h_metas_base = test_util::HostBuffer<M>(BATCH_SIZE).ptr();
+  V* h_vectors_base = test_util::HostBuffer<V>(BATCH_SIZE * options.dim).ptr();
 
-  K* h_keys_temp;
-  M* h_metas_temp;
-  V* h_vectors_temp;
+  K* h_keys_temp = test_util::HostBuffer<K>(MAX_CAPACITY).ptr();
+  M* h_metas_temp = test_util::HostBuffer<M>(MAX_CAPACITY).ptr();
+  V* h_vectors_temp =
+      test_util::HostBuffer<V>(MAX_CAPACITY * options.dim).ptr();
 
   K* d_keys_temp;
   M* d_metas_temp = nullptr;
   V* d_vectors_temp;
 
-  CUDA_CHECK(cudaMallocHost(&h_keys_base, BATCH_SIZE * sizeof(K)));
-  CUDA_CHECK(cudaMallocHost(&h_metas_base, BATCH_SIZE * sizeof(M)));
-  CUDA_CHECK(
-      cudaMallocHost(&h_vectors_base, BATCH_SIZE * sizeof(V) * options.dim));
-
-  CUDA_CHECK(cudaMallocHost(&h_keys_temp, MAX_CAPACITY * sizeof(K)));
-  CUDA_CHECK(cudaMallocHost(&h_metas_temp, MAX_CAPACITY * sizeof(M)));
-  CUDA_CHECK(
-      cudaMallocHost(&h_vectors_temp, MAX_CAPACITY * sizeof(V) * options.dim));
-
   CUDA_CHECK(cudaMalloc(&d_keys_temp, MAX_CAPACITY * sizeof(K)));
   CUDA_CHECK(cudaMalloc(&d_metas_temp, MAX_CAPACITY * sizeof(M)));
   CUDA_CHECK(
@@ -1813,26 +1804,29 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) {
           MAX_CAPACITY, 0, d_keys_temp, d_vectors_temp, d_metas_temp, stream);
 
       CUDA_CHECK(cudaMemcpy(h_keys_temp, d_keys_temp, MAX_CAPACITY * sizeof(K),
-                            cudaMemcpyDefault));
+                            cudaMemcpyDeviceToHost));
       CUDA_CHECK(cudaMemcpy(h_metas_temp, d_metas_temp,
-                            MAX_CAPACITY * sizeof(M), cudaMemcpyDefault));
+                            MAX_CAPACITY * sizeof(M), cudaMemcpyDeviceToHost));
       CUDA_CHECK(cudaMemcpy(h_vectors_temp, d_vectors_temp,
                             MAX_CAPACITY * sizeof(V) * options.dim,
-                            cudaMemcpyDefault));
+                            cudaMemcpyDeviceToHost));
 
       size_t bigger_meta_counter = 0;
       K max_key = 0;
-
+      size_t values_error_counter = 0;
       for (int i = 0; i < dump_counter; i++) {
         ASSERT_EQ(h_keys_temp[i], h_metas_temp[i]);
         max_key = std::max(max_key, h_keys_temp[i]);
         if (h_metas_temp[i] >= expected_min_key) bigger_meta_counter++;
         for (int j = 0; j < options.dim; j++) {
-          ASSERT_EQ(h_vectors_temp[i * options.dim + j],
-                    static_cast<float>(h_keys_temp[i] * 0.00001));
+          if (h_vectors_temp[i * options.dim + j] !=
+              static_cast<float>(h_keys_temp[i] * 0.00001)) {
+            values_error_counter++;
+          }
         }
       }
 
+      ASSERT_EQ(values_error_counter, 0);
       float correct_rate = (bigger_meta_counter * 1.0) / MAX_CAPACITY;
       std::cout << std::setprecision(3) << "[Round " << r << "]"
                 << "correct_rate=" << correct_rate << std::endl;
@@ -1842,14 +1836,6 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) {
   }
   CUDA_CHECK(cudaStreamDestroy(stream));
 
-  CUDA_CHECK(cudaFreeHost(h_keys_base));
-  CUDA_CHECK(cudaFreeHost(h_metas_base));
-  CUDA_CHECK(cudaFreeHost(h_vectors_base));
-
-  CUDA_CHECK(cudaFreeHost(h_keys_temp));
-  CUDA_CHECK(cudaFreeHost(h_metas_temp));
-  CUDA_CHECK(cudaFreeHost(h_vectors_temp));
-
   CUDA_CHECK(cudaFree(d_keys_temp));
   CUDA_CHECK(cudaFree(d_metas_temp));
   CUDA_CHECK(cudaFree(d_vectors_temp));
@@ -2274,8 +2260,8 @@ void CheckFindOrInsertValues(Table* table, K* keys, V* values, M* metas,
     }
   }
   ASSERT_EQ(value_diff_cnt, 0);
-  std::cout << "Check find_or_insert behavior got value_diff_cnt: "
-            << value_diff_cnt
+  std::cout << "Check find_or_insert behavior got "
+            << "value_diff_cnt: " << value_diff_cnt
             << ", while table_size_before: " << table_size_before
             << ", while table_size_after: " << table_size_after
             << ", while len: " << len << std::endl;
diff --git a/tests/insert_and_evict_test.cc.cu b/tests/insert_and_evict_test.cc.cu
index 58dac30c8..f2b9e17d3 100644
--- a/tests/insert_and_evict_test.cc.cu
+++ b/tests/insert_and_evict_test.cc.cu
@@ -266,6 +266,13 @@ void CheckInsertAndEvict(Table* table, K* keys, V* values, M* metas,
       }
     }
   }
+  std::cout << "Check insert_and_evict behavior got "
+            << "key_miss_cnt: " << key_miss_cnt
+            << ", value_diff_cnt: " << value_diff_cnt
+            << ", while table_size_before: " << table_size_before
+            << ", while table_size_after: " << table_size_after
+            << ", while len: " << len << std::endl;
+
   ASSERT_EQ(key_miss_cnt, 0);
   ASSERT_EQ(value_diff_cnt, 0);
 
diff --git a/tests/merlin_hashtable_test.cc.cu b/tests/merlin_hashtable_test.cc.cu
index 0b81af7be..56948a57a 100644
--- a/tests/merlin_hashtable_test.cc.cu
+++ b/tests/merlin_hashtable_test.cc.cu
@@ -1725,28 +1725,19 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) {
   options.max_hbm_for_vectors = nv::merlin::GB(max_hbm_for_vectors);
   options.evict_strategy = nv::merlin::EvictStrategy::kCustomized;
 
-  K* h_keys_base;
-  M* h_metas_base;
-  V* h_vectors_base;
+  K* h_keys_base = test_util::HostBuffer<K>(BATCH_SIZE).ptr();
+  M* h_metas_base = test_util::HostBuffer<M>(BATCH_SIZE).ptr();
+  V* h_vectors_base = test_util::HostBuffer<V>(BATCH_SIZE * options.dim).ptr();
 
-  K* h_keys_temp;
-  M* h_metas_temp;
-  V* h_vectors_temp;
+  K* h_keys_temp = test_util::HostBuffer<K>(MAX_CAPACITY).ptr();
+  M* h_metas_temp = test_util::HostBuffer<M>(MAX_CAPACITY).ptr();
+  V* h_vectors_temp =
+      test_util::HostBuffer<V>(MAX_CAPACITY * options.dim).ptr();
 
   K* d_keys_temp;
   M* d_metas_temp = nullptr;
   V* d_vectors_temp;
 
-  CUDA_CHECK(cudaMallocHost(&h_keys_base, BATCH_SIZE * sizeof(K)));
-  CUDA_CHECK(cudaMallocHost(&h_metas_base, BATCH_SIZE * sizeof(M)));
-  CUDA_CHECK(
-      cudaMallocHost(&h_vectors_base, BATCH_SIZE * sizeof(V) * options.dim));
-
-  CUDA_CHECK(cudaMallocHost(&h_keys_temp, MAX_CAPACITY * sizeof(K)));
-  CUDA_CHECK(cudaMallocHost(&h_metas_temp, MAX_CAPACITY * sizeof(M)));
-  CUDA_CHECK(
-      cudaMallocHost(&h_vectors_temp, MAX_CAPACITY * sizeof(V) * options.dim));
-
   CUDA_CHECK(cudaMalloc(&d_keys_temp, MAX_CAPACITY * sizeof(K)));
   CUDA_CHECK(cudaMalloc(&d_metas_temp, MAX_CAPACITY * sizeof(M)));
   CUDA_CHECK(
@@ -1828,14 +1819,6 @@ void test_evict_strategy_customized_correct_rate(size_t max_hbm_for_vectors) {
   }
   CUDA_CHECK(cudaStreamDestroy(stream));
 
-  CUDA_CHECK(cudaFreeHost(h_keys_base));
-  CUDA_CHECK(cudaFreeHost(h_metas_base));
-  CUDA_CHECK(cudaFreeHost(h_vectors_base));
-
-  CUDA_CHECK(cudaFreeHost(h_keys_temp));
-  CUDA_CHECK(cudaFreeHost(h_metas_temp));
-  CUDA_CHECK(cudaFreeHost(h_vectors_temp));
-
   CUDA_CHECK(cudaFree(d_keys_temp));
   CUDA_CHECK(cudaFree(d_metas_temp));
   CUDA_CHECK(cudaFree(d_vectors_temp));
diff --git a/tests/test_util.cuh b/tests/test_util.cuh
index 07d79ed0c..545299d92 100644
--- a/tests/test_util.cuh
+++ b/tests/test_util.cuh
@@ -502,4 +502,38 @@ std::array<T, N> range(const T start) {
   }
   return result;
 }
+
+template <class T>
+class HostBuffer {
+ public:
+  HostBuffer(const size_t size = 1) : ptr_(nullptr) {
+    if (!ptr_) {
+      size_ = size;
+      ptr_ = reinterpret_cast<T*>(malloc(sizeof(T) * size_));
+    }
+  }
+  ~HostBuffer() {
+    try {
+      if (!ptr_) free(ptr_);
+    } catch (const nv::merlin::CudaException& e) {
+      cerr << "[HierarchicalKV] Failed to free HostBuffer!" << endl;
+    }
+  }
+
+  __inline__ T* alloc_or_reuse(const size_t size = 0) {
+    if (size > size_) {
+      free(ptr_);
+      size_ = size;
+      reinterpret_cast<T*>(malloc(sizeof(T) * size_));
+    }
+    return ptr_;
+  }
+
+  __inline__ T* ptr() { return ptr_; }
+
+ private:
+  T* ptr_;
+  size_t size_;
+};
+
 }  // namespace test_util