From 12f68fca055c6301947fa29c72cda13a9360e054 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Tue, 16 Jul 2024 20:37:05 +0800 Subject: [PATCH] GH-43209: [C++] Add lint for DCHECK in public headers (#43248) ### Rationale for this change I raised my question in #43209 about which I have always been curious. The top answer makes a good sense and after some searching in the code base, I found more evidence like: https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/public_api_test.cc#L67-L71 So I'm making the following changes to `DCHECK` macro family. ### What changes are included in this PR? 1. Add lint rule for `DCHECK` usage in public headers; 2. Cleanup exisiting `DCHECK` usages in public (probably non-public, but at least not named with `internal`) headers; 3. Add `ifdef` protection for `DCHECK` definition like we did for `ASSIGN_OR_RAISE` (https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/result_internal.h#L20-L22) and `RETURN_NOT_OK` (https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/status.h#L80-L82), to not mess up user code that directly includes `arrow/util/logging.h`. 4. Add comments as guideline. ### Are these changes tested? No test needed. ### Are there any user-facing changes? Probably not? * GitHub Issue: #43209 Authored-by: Ruoxi Sun Signed-off-by: Antoine Pitrou --- cpp/build-support/lint_cpp_cli.py | 11 +++++------ cpp/src/arrow/acero/asof_join_node.cc | 2 +- cpp/src/arrow/acero/sorted_merge_node.cc | 2 +- ..._table.h => unmaterialized_table_internal.h} | 0 cpp/src/arrow/acero/util.h | 2 +- ...ream_utils.h => bit_stream_utils_internal.h} | 0 cpp/src/arrow/util/bit_util_test.cc | 2 +- cpp/src/arrow/util/logging.h | 17 +++++++++++++++++ .../{rle_encoding.h => rle_encoding_internal.h} | 2 +- cpp/src/arrow/util/rle_encoding_test.cc | 4 ++-- cpp/src/arrow/util/tdigest.h | 2 +- cpp/src/arrow/util/vector.h | 8 ++++---- cpp/src/gandiva/dex_visitor.h | 2 +- cpp/src/gandiva/engine.h | 2 +- cpp/src/gandiva/eval_batch.h | 12 ++++++------ cpp/src/gandiva/llvm_types.h | 2 +- cpp/src/gandiva/local_bitmaps_holder.h | 2 +- cpp/src/gandiva/selection_vector_impl.h | 2 +- cpp/src/parquet/bloom_filter.h | 4 ++-- cpp/src/parquet/column_reader.cc | 4 ++-- cpp/src/parquet/column_writer.cc | 4 ++-- cpp/src/parquet/encoding.cc | 4 ++-- cpp/src/parquet/level_conversion_inc.h | 2 +- 23 files changed, 54 insertions(+), 38 deletions(-) rename cpp/src/arrow/acero/{unmaterialized_table.h => unmaterialized_table_internal.h} (100%) rename cpp/src/arrow/util/{bit_stream_utils.h => bit_stream_utils_internal.h} (100%) rename cpp/src/arrow/util/{rle_encoding.h => rle_encoding_internal.h} (99%) diff --git a/cpp/build-support/lint_cpp_cli.py b/cpp/build-support/lint_cpp_cli.py index a0eb8f0efe6d5..47abd53fe925d 100755 --- a/cpp/build-support/lint_cpp_cli.py +++ b/cpp/build-support/lint_cpp_cli.py @@ -31,6 +31,7 @@ _NULLPTR_REGEX = re.compile(r'.*\bnullptr\b.*') _RETURN_NOT_OK_REGEX = re.compile(r'.*\sRETURN_NOT_OK.*') _ASSIGN_OR_RAISE_REGEX = re.compile(r'.*\sASSIGN_OR_RAISE.*') +_DCHECK_REGEX = re.compile(r'.*\sDCHECK.*') def _paths(paths): @@ -54,14 +55,12 @@ def lint_file(path): (lambda x: re.match(_RETURN_NOT_OK_REGEX, x), 'Use ARROW_RETURN_NOT_OK in header files', _paths('''\ arrow/status.h - test - arrow/util/hash.h arrow/python/util''')), (lambda x: re.match(_ASSIGN_OR_RAISE_REGEX, x), - 'Use ARROW_ASSIGN_OR_RAISE in header files', _paths('''\ - arrow/result_internal.h - test - ''')) + 'Use ARROW_ASSIGN_OR_RAISE in header files', []), + (lambda x: re.match(_DCHECK_REGEX, x), + 'Use ARROW_DCHECK in header files', _paths('''\ + arrow/util/logging.h''')) ] diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 848cbdf7506ad..2248362241cd7 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -32,7 +32,7 @@ #include "arrow/acero/exec_plan.h" #include "arrow/acero/options.h" -#include "arrow/acero/unmaterialized_table.h" +#include "arrow/acero/unmaterialized_table_internal.h" #ifndef NDEBUG #include "arrow/acero/options_internal.h" #endif diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc b/cpp/src/arrow/acero/sorted_merge_node.cc index a71ac79efcc46..2845383cee982 100644 --- a/cpp/src/arrow/acero/sorted_merge_node.cc +++ b/cpp/src/arrow/acero/sorted_merge_node.cc @@ -28,7 +28,7 @@ #include "arrow/acero/options.h" #include "arrow/acero/query_context.h" #include "arrow/acero/time_series_util.h" -#include "arrow/acero/unmaterialized_table.h" +#include "arrow/acero/unmaterialized_table_internal.h" #include "arrow/acero/util.h" #include "arrow/array/builder_base.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/acero/unmaterialized_table.h b/cpp/src/arrow/acero/unmaterialized_table_internal.h similarity index 100% rename from cpp/src/arrow/acero/unmaterialized_table.h rename to cpp/src/arrow/acero/unmaterialized_table_internal.h diff --git a/cpp/src/arrow/acero/util.h b/cpp/src/arrow/acero/util.h index 0eb9f4c87e180..ee46e8527422a 100644 --- a/cpp/src/arrow/acero/util.h +++ b/cpp/src/arrow/acero/util.h @@ -65,7 +65,7 @@ class ARROW_ACERO_EXPORT AtomicCounter { // return true if the counter is complete bool Increment() { - DCHECK_NE(count_.load(), total_.load()); + ARROW_DCHECK_NE(count_.load(), total_.load()); int count = count_.fetch_add(1) + 1; if (count != total_.load()) return false; return DoneOnce(); diff --git a/cpp/src/arrow/util/bit_stream_utils.h b/cpp/src/arrow/util/bit_stream_utils_internal.h similarity index 100% rename from cpp/src/arrow/util/bit_stream_utils.h rename to cpp/src/arrow/util/bit_stream_utils_internal.h diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index e026dfec24065..c7674af57f167 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -43,7 +43,7 @@ #include "arrow/testing/util.h" #include "arrow/type_fwd.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bitmap.h" #include "arrow/util/bitmap_generate.h" #include "arrow/util/bitmap_ops.h" diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h index 2a2175ec0fc72..be73c020c07f8 100644 --- a/cpp/src/arrow/util/logging.h +++ b/cpp/src/arrow/util/logging.h @@ -138,14 +138,31 @@ enum class ArrowLogLevel : int { #endif // NDEBUG +// These are internal-use macros and should not be used in public headers. +#ifndef DCHECK #define DCHECK ARROW_DCHECK +#endif +#ifndef DCHECK_OK #define DCHECK_OK ARROW_DCHECK_OK +#endif +#ifndef DCHECK_EQ #define DCHECK_EQ ARROW_DCHECK_EQ +#endif +#ifndef DCHECK_NE #define DCHECK_NE ARROW_DCHECK_NE +#endif +#ifndef DCHECK_LE #define DCHECK_LE ARROW_DCHECK_LE +#endif +#ifndef DCHECK_LT #define DCHECK_LT ARROW_DCHECK_LT +#endif +#ifndef DCHECK_GE #define DCHECK_GE ARROW_DCHECK_GE +#endif +#ifndef DCHECK_GT #define DCHECK_GT ARROW_DCHECK_GT +#endif // This code is adapted from // https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h. diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding_internal.h similarity index 99% rename from cpp/src/arrow/util/rle_encoding.h rename to cpp/src/arrow/util/rle_encoding_internal.h index e0f5690062a04..4575320659706 100644 --- a/cpp/src/arrow/util/rle_encoding.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -27,7 +27,7 @@ #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 26984e5f7735d..0cc0a276a25f4 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -28,10 +28,10 @@ #include "arrow/buffer.h" #include "arrow/testing/random.h" #include "arrow/type.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/io_util.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" namespace arrow { namespace util { diff --git a/cpp/src/arrow/util/tdigest.h b/cpp/src/arrow/util/tdigest.h index 308df468840eb..ea033ed696d1b 100644 --- a/cpp/src/arrow/util/tdigest.h +++ b/cpp/src/arrow/util/tdigest.h @@ -56,7 +56,7 @@ class ARROW_EXPORT TDigest { // this function is intensively called and performance critical // call it only if you are sure no NAN exists in input data void Add(double value) { - DCHECK(!std::isnan(value)) << "cannot add NAN"; + ARROW_DCHECK(!std::isnan(value)) << "cannot add NAN"; if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) { MergeInput(); } diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h index 74b6a2403a2bb..e77d713a44d01 100644 --- a/cpp/src/arrow/util/vector.h +++ b/cpp/src/arrow/util/vector.h @@ -31,8 +31,8 @@ namespace internal { template std::vector DeleteVectorElement(const std::vector& values, size_t index) { - DCHECK(!values.empty()); - DCHECK_LT(index, values.size()); + ARROW_DCHECK(!values.empty()); + ARROW_DCHECK_LT(index, values.size()); std::vector out; out.reserve(values.size() - 1); for (size_t i = 0; i < index; ++i) { @@ -47,7 +47,7 @@ std::vector DeleteVectorElement(const std::vector& values, size_t index) { template std::vector AddVectorElement(const std::vector& values, size_t index, T new_element) { - DCHECK_LE(index, values.size()); + ARROW_DCHECK_LE(index, values.size()); std::vector out; out.reserve(values.size() + 1); for (size_t i = 0; i < index; ++i) { @@ -63,7 +63,7 @@ std::vector AddVectorElement(const std::vector& values, size_t index, template std::vector ReplaceVectorElement(const std::vector& values, size_t index, T new_element) { - DCHECK_LE(index, values.size()); + ARROW_DCHECK_LE(index, values.size()); std::vector out; out.reserve(values.size()); for (size_t i = 0; i < index; ++i) { diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h index 5d160bb22ca68..4115df7ffb22b 100644 --- a/cpp/src/gandiva/dex_visitor.h +++ b/cpp/src/gandiva/dex_visitor.h @@ -70,7 +70,7 @@ class GANDIVA_EXPORT DexVisitor { /// Default implementation with only DCHECK(). #define VISIT_DCHECK(DEX_CLASS) \ - void Visit(const DEX_CLASS& dex) override { DCHECK(0); } + void Visit(const DEX_CLASS& dex) override { ARROW_DCHECK(0); } class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor { VISIT_DCHECK(VectorReadValidityDex) diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 565c3f142502d..3a69500e38bcf 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -67,7 +67,7 @@ class GANDIVA_EXPORT Engine { /// Add the function to the list of IR functions that need to be compiled. /// Compiling only the functions that are used by the module saves time. void AddFunctionToCompile(const std::string& fname) { - DCHECK(!module_finalized_); + ARROW_DCHECK(!module_finalized_); functions_to_compile_.push_back(fname); } diff --git a/cpp/src/gandiva/eval_batch.h b/cpp/src/gandiva/eval_batch.h index 9644010b721af..feb4cdc975005 100644 --- a/cpp/src/gandiva/eval_batch.h +++ b/cpp/src/gandiva/eval_batch.h @@ -53,22 +53,22 @@ class EvalBatch { int GetNumBuffers() const { return num_buffers_; } const uint8_t* GetBuffer(int idx) const { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffers_array_.get())[idx]; } uint8_t* GetBuffer(int idx) { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffers_array_.get())[idx]; } int64_t GetBufferOffset(int idx) const { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffer_offsets_array_.get())[idx]; } void SetBuffer(int idx, uint8_t* buffer, int64_t offset) { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); (buffers_array_.get())[idx] = buffer; (buffer_offsets_array_.get())[idx] = offset; } @@ -80,11 +80,11 @@ class EvalBatch { } const uint8_t* GetLocalBitMap(int idx) const { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_holder_->GetLocalBitMap(idx); } uint8_t* GetLocalBitMap(int idx) { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_holder_->GetLocalBitMap(idx); } diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index d6f0952713efc..7768a7f7e4bde 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -97,7 +97,7 @@ class GANDIVA_EXPORT LLVMTypes { } else if (type->isFloatingPointTy()) { return llvm::ConstantFP::get(type, 0); } else { - DCHECK(type->isPointerTy()); + ARROW_DCHECK(type->isPointerTy()); return llvm::ConstantPointerNull::getNullValue(type); } } diff --git a/cpp/src/gandiva/local_bitmaps_holder.h b/cpp/src/gandiva/local_bitmaps_holder.h index a172fb973c4a5..dc24a32e7cad0 100644 --- a/cpp/src/gandiva/local_bitmaps_holder.h +++ b/cpp/src/gandiva/local_bitmaps_holder.h @@ -40,7 +40,7 @@ class LocalBitMapsHolder { uint8_t** GetLocalBitMapArray() const { return local_bitmaps_array_.get(); } uint8_t* GetLocalBitMap(int idx) const { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_array_.get()[idx]; } diff --git a/cpp/src/gandiva/selection_vector_impl.h b/cpp/src/gandiva/selection_vector_impl.h index dc9724ca86fe2..234298daf5748 100644 --- a/cpp/src/gandiva/selection_vector_impl.h +++ b/cpp/src/gandiva/selection_vector_impl.h @@ -60,7 +60,7 @@ class SelectionVectorImpl : public SelectionVector { int64_t GetNumSlots() const override { return num_slots_; } void SetNumSlots(int64_t num_slots) override { - DCHECK_LE(num_slots, max_slots_); + ARROW_DCHECK_LE(num_slots, max_slots_); num_slots_ = num_slots; } diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h index 909563d013fed..82172f363ba7e 100644 --- a/cpp/src/parquet/bloom_filter.h +++ b/cpp/src/parquet/bloom_filter.h @@ -221,7 +221,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { /// kMaximumBloomFilterBytes, and the return value is always a power of 2 static uint32_t OptimalNumOfBytes(uint32_t ndv, double fpp) { uint32_t optimal_num_of_bits = OptimalNumOfBits(ndv, fpp); - DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits)); + ARROW_DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits)); return optimal_num_of_bits >> 3; } @@ -233,7 +233,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { /// @return it always return a value between kMinimumBloomFilterBytes * 8 and /// kMaximumBloomFilterBytes * 8, and the return value is always a power of 16 static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) { - DCHECK(fpp > 0.0 && fpp < 1.0); + ARROW_DCHECK(fpp > 0.0 && fpp < 1.0); const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8)); uint32_t num_bits; diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index ebf9515f27607..05ee6a16c5448 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -36,14 +36,14 @@ #include "arrow/array/builder_primitive.h" #include "arrow/chunked_array.h" #include "arrow/type.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/compression.h" #include "arrow/util/crc32.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/unreachable.h" #include "parquet/column_page.h" #include "parquet/encoding.h" diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index c9f6e482981c0..90e0102b422bb 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -32,7 +32,7 @@ #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/checked_cast.h" @@ -41,7 +41,7 @@ #include "arrow/util/endian.h" #include "arrow/util/float16.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/type_traits.h" #include "arrow/visit_array_inline.h" #include "parquet/column_page.h" diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 54e1e000040a1..c3f2b79629d9b 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -33,7 +33,7 @@ #include "arrow/type_traits.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_writer.h" @@ -42,7 +42,7 @@ #include "arrow/util/hashing.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/ubsan.h" #include "arrow/visit_data_inline.h" #include "parquet/exception.h" diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h index d1ccedabfde50..3accb154e6f5a 100644 --- a/cpp/src/parquet/level_conversion_inc.h +++ b/cpp/src/parquet/level_conversion_inc.h @@ -296,7 +296,7 @@ template int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size, int64_t upper_bound_remaining, LevelInfo level_info, ::arrow::internal::FirstTimeBitmapWriter* writer) { - DCHECK_LE(batch_size, kExtractBitsSize); + ARROW_DCHECK_LE(batch_size, kExtractBitsSize); // Greater than level_info.def_level - 1 implies >= the def_level auto defined_bitmap = static_cast(