From b13eeb5b2d0fade6ba0f8ed76ca14241aeac2b16 Mon Sep 17 00:00:00 2001 From: ZhangHuiGui Date: Thu, 28 Mar 2024 20:03:45 +0800 Subject: [PATCH] fix --- cpp/src/arrow/compute/key_hash.cc | 8 ++++---- cpp/src/arrow/compute/key_hash_test.cc | 28 +++++++++++++------------- cpp/src/arrow/compute/util.cc | 6 +++--- cpp/src/arrow/compute/util.h | 4 ++-- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/compute/key_hash.cc b/cpp/src/arrow/compute/key_hash.cc index bcd4c4fc0fb05..e375ae455a8e3 100644 --- a/cpp/src/arrow/compute/key_hash.cc +++ b/cpp/src/arrow/compute/key_hash.cc @@ -389,9 +389,9 @@ Status Hashing32::HashMultiColumn(const std::vector& cols, // pre calculate alloc size in TempVectorStack for hash_temp_buf, null_hash_temp_buf // and null_indices_buf const auto alloc_hash_temp_buf = - util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint32_t)); + util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint32_t)); const auto alloc_for_null_indices_buf = - util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint16_t)); + util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint16_t)); const auto alloc_size = alloc_hash_temp_buf * 2 + alloc_for_null_indices_buf; std::unique_ptr temp_stack(nullptr); @@ -851,9 +851,9 @@ Status Hashing64::HashMultiColumn(const std::vector& cols, // pre calculate alloc size in TempVectorStack for null_indices_buf, null_hash_temp_buf const auto alloc_for_null_hash_temp_buf = - util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint64_t)); + util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint64_t)); const auto alloc_for_null_indices_buf = - util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint16_t)); + util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint16_t)); const auto alloc_size = alloc_for_null_hash_temp_buf + alloc_for_null_indices_buf; std::unique_ptr temp_stack(nullptr); diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index ae03bc27a234c..345d805bdff7f 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -319,42 +319,42 @@ TEST(HashBatch, AllocTempStackAsNeeded) { std::vector temp_column_arrays; // HashBatch using internally allocated buffer. - std::vector hashes32(batch_size); - std::vector hashes64(batch_size); + std::vector internal_allocated_hashes32(batch_size); + std::vector internal_allocated_hashes64(batch_size); ASSERT_OK(arrow::compute::Hashing32::HashBatch( - exec_batch, hashes32.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(), - nullptr, 0, batch_size)); + exec_batch, internal_allocated_hashes32.data(), temp_column_arrays, + ctx->cpu_info()->hardware_flags(), nullptr, 0, batch_size)); ASSERT_OK(arrow::compute::Hashing64::HashBatch( - exec_batch, hashes64.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(), - nullptr, 0, batch_size)); + exec_batch, internal_allocated_hashes64.data(), temp_column_arrays, + ctx->cpu_info()->hardware_flags(), nullptr, 0, batch_size)); util::TempVectorStack hash32_stack, hash64_stack; - std::vector new_hashes32(batch_size); - std::vector new_hashes64(batch_size); + std::vector pre_allocated_hashes32(batch_size); + std::vector pre_allocated_hashes64(batch_size); // HashBatch using pre-allocated buffer of insufficient size raises stack overflow. ASSERT_OK(hash32_stack.Init(default_memory_pool(), batch_size)); ASSERT_NOT_OK(arrow::compute::Hashing32::HashBatch( - exec_batch, new_hashes32.data(), temp_column_arrays, + exec_batch, pre_allocated_hashes32.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(), &hash32_stack, 0, batch_size)); ASSERT_OK(hash64_stack.Init(default_memory_pool(), batch_size)); ASSERT_NOT_OK(arrow::compute::Hashing64::HashBatch( - exec_batch, new_hashes64.data(), temp_column_arrays, + exec_batch, pre_allocated_hashes64.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(), &hash64_stack, 0, batch_size)); // HashBatch using big enough pre-allocated buffer. ASSERT_OK(hash32_stack.Init(default_memory_pool(), 1024)); ASSERT_OK(arrow::compute::Hashing32::HashBatch( - exec_batch, new_hashes32.data(), temp_column_arrays, + exec_batch, pre_allocated_hashes32.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(), &hash32_stack, 0, batch_size)); ASSERT_OK(hash64_stack.Init(default_memory_pool(), 1024)); ASSERT_OK(arrow::compute::Hashing64::HashBatch( - exec_batch, new_hashes64.data(), temp_column_arrays, + exec_batch, pre_allocated_hashes64.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(), &hash64_stack, 0, batch_size)); for (int i = 0; i < batch_size; i++) { - EXPECT_EQ(hashes32[i], new_hashes32[i]); - EXPECT_EQ(hashes64[i], new_hashes64[i]); + EXPECT_EQ(internal_allocated_hashes32[i], pre_allocated_hashes32[i]); + EXPECT_EQ(internal_allocated_hashes64[i], pre_allocated_hashes64[i]); } } diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index df9aea701e48a..f61959aa6d678 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -32,10 +32,10 @@ using internal::CpuInfo; namespace util { void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { - const auto estimate_size = EstimateAllocationSize(num_bytes); + const auto estimated_size = EstimatedAllocationSize(num_bytes); // XXX cannot return a regular Status because most consumers do not either. - ARROW_CHECK_OK(CheckAllocationOverflow(estimate_size)); - int64_t new_top = top_ + estimate_size; + ARROW_CHECK_OK(CheckAllocationOverflow(estimated_size)); + int64_t new_top = top_ + estimated_size; *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); // We set 8 bytes before the beginning of the allocated range and // 8 bytes after the end to check for stack overflow (which would diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 9817b49ecee50..4d67eeba829cd 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -89,7 +89,7 @@ class ARROW_EXPORT TempVectorStack { Status Init(MemoryPool* pool, int64_t size) { num_vectors_ = 0; top_ = 0; - buffer_size_ = EstimateAllocationSize(size); + buffer_size_ = EstimatedAllocationSize(size); ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); // Ensure later operations don't accidentally read uninitialized memory. std::memset(buffer->mutable_data(), 0xFF, size); @@ -97,7 +97,7 @@ class ARROW_EXPORT TempVectorStack { return Status::OK(); } - static int64_t EstimateAllocationSize(int64_t size) { + static int64_t EstimatedAllocationSize(int64_t size) { return PaddedAllocationSize(size) + 2 * sizeof(uint64_t); }