Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhangHuiGui committed Mar 28, 2024
1 parent 9492660 commit b13eeb5
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 23 deletions.
8 changes: 4 additions & 4 deletions cpp/src/arrow/compute/key_hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -389,9 +389,9 @@ Status Hashing32::HashMultiColumn(const std::vector<KeyColumnArray>& cols,
// pre calculate alloc size in TempVectorStack for hash_temp_buf, null_hash_temp_buf
// and null_indices_buf
const auto alloc_hash_temp_buf =
util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint32_t));
util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint32_t));
const auto alloc_for_null_indices_buf =
util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint16_t));
util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint16_t));
const auto alloc_size = alloc_hash_temp_buf * 2 + alloc_for_null_indices_buf;

std::unique_ptr<util::TempVectorStack> temp_stack(nullptr);
Expand Down Expand Up @@ -851,9 +851,9 @@ Status Hashing64::HashMultiColumn(const std::vector<KeyColumnArray>& cols,

// pre calculate alloc size in TempVectorStack for null_indices_buf, null_hash_temp_buf
const auto alloc_for_null_hash_temp_buf =
util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint64_t));
util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint64_t));
const auto alloc_for_null_indices_buf =
util::TempVectorStack::EstimateAllocationSize(max_batch_size * sizeof(uint16_t));
util::TempVectorStack::EstimatedAllocationSize(max_batch_size * sizeof(uint16_t));
const auto alloc_size = alloc_for_null_hash_temp_buf + alloc_for_null_indices_buf;

std::unique_ptr<util::TempVectorStack> temp_stack(nullptr);
Expand Down
28 changes: 14 additions & 14 deletions cpp/src/arrow/compute/key_hash_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -319,42 +319,42 @@ TEST(HashBatch, AllocTempStackAsNeeded) {
std::vector<arrow::compute::KeyColumnArray> temp_column_arrays;

// HashBatch using internally allocated buffer.
std::vector<uint32_t> hashes32(batch_size);
std::vector<uint64_t> hashes64(batch_size);
std::vector<uint32_t> internal_allocated_hashes32(batch_size);
std::vector<uint64_t> internal_allocated_hashes64(batch_size);
ASSERT_OK(arrow::compute::Hashing32::HashBatch(
exec_batch, hashes32.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(),
nullptr, 0, batch_size));
exec_batch, internal_allocated_hashes32.data(), temp_column_arrays,
ctx->cpu_info()->hardware_flags(), nullptr, 0, batch_size));
ASSERT_OK(arrow::compute::Hashing64::HashBatch(
exec_batch, hashes64.data(), temp_column_arrays, ctx->cpu_info()->hardware_flags(),
nullptr, 0, batch_size));
exec_batch, internal_allocated_hashes64.data(), temp_column_arrays,
ctx->cpu_info()->hardware_flags(), nullptr, 0, batch_size));

util::TempVectorStack hash32_stack, hash64_stack;
std::vector<uint32_t> new_hashes32(batch_size);
std::vector<uint64_t> new_hashes64(batch_size);
std::vector<uint32_t> pre_allocated_hashes32(batch_size);
std::vector<uint64_t> pre_allocated_hashes64(batch_size);

// HashBatch using pre-allocated buffer of insufficient size raises stack overflow.
ASSERT_OK(hash32_stack.Init(default_memory_pool(), batch_size));
ASSERT_NOT_OK(arrow::compute::Hashing32::HashBatch(
exec_batch, new_hashes32.data(), temp_column_arrays,
exec_batch, pre_allocated_hashes32.data(), temp_column_arrays,
ctx->cpu_info()->hardware_flags(), &hash32_stack, 0, batch_size));
ASSERT_OK(hash64_stack.Init(default_memory_pool(), batch_size));
ASSERT_NOT_OK(arrow::compute::Hashing64::HashBatch(
exec_batch, new_hashes64.data(), temp_column_arrays,
exec_batch, pre_allocated_hashes64.data(), temp_column_arrays,
ctx->cpu_info()->hardware_flags(), &hash64_stack, 0, batch_size));

// HashBatch using big enough pre-allocated buffer.
ASSERT_OK(hash32_stack.Init(default_memory_pool(), 1024));
ASSERT_OK(arrow::compute::Hashing32::HashBatch(
exec_batch, new_hashes32.data(), temp_column_arrays,
exec_batch, pre_allocated_hashes32.data(), temp_column_arrays,
ctx->cpu_info()->hardware_flags(), &hash32_stack, 0, batch_size));
ASSERT_OK(hash64_stack.Init(default_memory_pool(), 1024));
ASSERT_OK(arrow::compute::Hashing64::HashBatch(
exec_batch, new_hashes64.data(), temp_column_arrays,
exec_batch, pre_allocated_hashes64.data(), temp_column_arrays,
ctx->cpu_info()->hardware_flags(), &hash64_stack, 0, batch_size));

for (int i = 0; i < batch_size; i++) {
EXPECT_EQ(hashes32[i], new_hashes32[i]);
EXPECT_EQ(hashes64[i], new_hashes64[i]);
EXPECT_EQ(internal_allocated_hashes32[i], pre_allocated_hashes32[i]);
EXPECT_EQ(internal_allocated_hashes64[i], pre_allocated_hashes64[i]);
}
}

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/compute/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ using internal::CpuInfo;
namespace util {

void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
const auto estimate_size = EstimateAllocationSize(num_bytes);
const auto estimated_size = EstimatedAllocationSize(num_bytes);
// XXX cannot return a regular Status because most consumers do not either.
ARROW_CHECK_OK(CheckAllocationOverflow(estimate_size));
int64_t new_top = top_ + estimate_size;
ARROW_CHECK_OK(CheckAllocationOverflow(estimated_size));
int64_t new_top = top_ + estimated_size;
*data = buffer_->mutable_data() + top_ + sizeof(uint64_t);
// We set 8 bytes before the beginning of the allocated range and
// 8 bytes after the end to check for stack overflow (which would
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,15 @@ class ARROW_EXPORT TempVectorStack {
Status Init(MemoryPool* pool, int64_t size) {
num_vectors_ = 0;
top_ = 0;
buffer_size_ = EstimateAllocationSize(size);
buffer_size_ = EstimatedAllocationSize(size);
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
// Ensure later operations don't accidentally read uninitialized memory.
std::memset(buffer->mutable_data(), 0xFF, size);
buffer_ = std::move(buffer);
return Status::OK();
}

static int64_t EstimateAllocationSize(int64_t size) {
static int64_t EstimatedAllocationSize(int64_t size) {
return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
}

Expand Down

0 comments on commit b13eeb5

Please sign in to comment.