Skip to content

Commit

Permalink
GH-43209: [C++] Add lint for DCHECK in public headers (#43248)
Browse files Browse the repository at this point in the history
### Rationale for this change

I raised my question in #43209 about which I have always been curious. The top answer makes a good sense and after some searching in the code base, I found more evidence like:
https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/public_api_test.cc#L67-L71

So I'm making the following changes to `DCHECK` macro family.

### What changes are included in this PR?

1. Add lint rule for `DCHECK` usage in public headers;
2. Cleanup exisiting `DCHECK` usages in public (probably non-public, but at least not named with `internal`) headers;
3. Add `ifdef` protection for `DCHECK` definition like we did for `ASSIGN_OR_RAISE` (https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/result_internal.h#L20-L22) and `RETURN_NOT_OK` (https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/status.h#L80-L82), to not mess up user code that directly includes `arrow/util/logging.h`.
4. Add comments as guideline.

### Are these changes tested?

No test needed.

### Are there any user-facing changes?

Probably not?

* GitHub Issue: #43209

Authored-by: Ruoxi Sun <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
zanmato1984 authored Jul 16, 2024
1 parent 36fe1da commit 12f68fc
Show file tree
Hide file tree
Showing 23 changed files with 54 additions and 38 deletions.
11 changes: 5 additions & 6 deletions cpp/build-support/lint_cpp_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
_NULLPTR_REGEX = re.compile(r'.*\bnullptr\b.*')
_RETURN_NOT_OK_REGEX = re.compile(r'.*\sRETURN_NOT_OK.*')
_ASSIGN_OR_RAISE_REGEX = re.compile(r'.*\sASSIGN_OR_RAISE.*')
_DCHECK_REGEX = re.compile(r'.*\sDCHECK.*')


def _paths(paths):
Expand All @@ -54,14 +55,12 @@ def lint_file(path):
(lambda x: re.match(_RETURN_NOT_OK_REGEX, x),
'Use ARROW_RETURN_NOT_OK in header files', _paths('''\
arrow/status.h
test
arrow/util/hash.h
arrow/python/util''')),
(lambda x: re.match(_ASSIGN_OR_RAISE_REGEX, x),
'Use ARROW_ASSIGN_OR_RAISE in header files', _paths('''\
arrow/result_internal.h
test
'''))
'Use ARROW_ASSIGN_OR_RAISE in header files', []),
(lambda x: re.match(_DCHECK_REGEX, x),
'Use ARROW_DCHECK in header files', _paths('''\
arrow/util/logging.h'''))

]

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/asof_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

#include "arrow/acero/exec_plan.h"
#include "arrow/acero/options.h"
#include "arrow/acero/unmaterialized_table.h"
#include "arrow/acero/unmaterialized_table_internal.h"
#ifndef NDEBUG
#include "arrow/acero/options_internal.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/sorted_merge_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include "arrow/acero/options.h"
#include "arrow/acero/query_context.h"
#include "arrow/acero/time_series_util.h"
#include "arrow/acero/unmaterialized_table.h"
#include "arrow/acero/unmaterialized_table_internal.h"
#include "arrow/acero/util.h"
#include "arrow/array/builder_base.h"
#include "arrow/result.h"
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class ARROW_ACERO_EXPORT AtomicCounter {

// return true if the counter is complete
bool Increment() {
DCHECK_NE(count_.load(), total_.load());
ARROW_DCHECK_NE(count_.load(), total_.load());
int count = count_.fetch_add(1) + 1;
if (count != total_.load()) return false;
return DoneOnce();
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/bit_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#include "arrow/testing/util.h"
#include "arrow/type_fwd.h"
#include "arrow/util/bit_run_reader.h"
#include "arrow/util/bit_stream_utils.h"
#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bitmap.h"
#include "arrow/util/bitmap_generate.h"
#include "arrow/util/bitmap_ops.h"
Expand Down
17 changes: 17 additions & 0 deletions cpp/src/arrow/util/logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,31 @@ enum class ArrowLogLevel : int {

#endif // NDEBUG

// These are internal-use macros and should not be used in public headers.
#ifndef DCHECK
#define DCHECK ARROW_DCHECK
#endif
#ifndef DCHECK_OK
#define DCHECK_OK ARROW_DCHECK_OK
#endif
#ifndef DCHECK_EQ
#define DCHECK_EQ ARROW_DCHECK_EQ
#endif
#ifndef DCHECK_NE
#define DCHECK_NE ARROW_DCHECK_NE
#endif
#ifndef DCHECK_LE
#define DCHECK_LE ARROW_DCHECK_LE
#endif
#ifndef DCHECK_LT
#define DCHECK_LT ARROW_DCHECK_LT
#endif
#ifndef DCHECK_GE
#define DCHECK_GE ARROW_DCHECK_GE
#endif
#ifndef DCHECK_GT
#define DCHECK_GT ARROW_DCHECK_GT
#endif

// This code is adapted from
// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_run_reader.h"
#include "arrow/util/bit_stream_utils.h"
#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/macros.h"

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/util/rle_encoding_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
#include "arrow/buffer.h"
#include "arrow/testing/random.h"
#include "arrow/type.h"
#include "arrow/util/bit_stream_utils.h"
#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/io_util.h"
#include "arrow/util/rle_encoding.h"
#include "arrow/util/rle_encoding_internal.h"

namespace arrow {
namespace util {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/tdigest.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class ARROW_EXPORT TDigest {
// this function is intensively called and performance critical
// call it only if you are sure no NAN exists in input data
void Add(double value) {
DCHECK(!std::isnan(value)) << "cannot add NAN";
ARROW_DCHECK(!std::isnan(value)) << "cannot add NAN";
if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) {
MergeInput();
}
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/util/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ namespace internal {

template <typename T>
std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
DCHECK(!values.empty());
DCHECK_LT(index, values.size());
ARROW_DCHECK(!values.empty());
ARROW_DCHECK_LT(index, values.size());
std::vector<T> out;
out.reserve(values.size() - 1);
for (size_t i = 0; i < index; ++i) {
Expand All @@ -47,7 +47,7 @@ std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
template <typename T>
std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
T new_element) {
DCHECK_LE(index, values.size());
ARROW_DCHECK_LE(index, values.size());
std::vector<T> out;
out.reserve(values.size() + 1);
for (size_t i = 0; i < index; ++i) {
Expand All @@ -63,7 +63,7 @@ std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
template <typename T>
std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
T new_element) {
DCHECK_LE(index, values.size());
ARROW_DCHECK_LE(index, values.size());
std::vector<T> out;
out.reserve(values.size());
for (size_t i = 0; i < index; ++i) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/dex_visitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class GANDIVA_EXPORT DexVisitor {

/// Default implementation with only DCHECK().
#define VISIT_DCHECK(DEX_CLASS) \
void Visit(const DEX_CLASS& dex) override { DCHECK(0); }
void Visit(const DEX_CLASS& dex) override { ARROW_DCHECK(0); }

class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
VISIT_DCHECK(VectorReadValidityDex)
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class GANDIVA_EXPORT Engine {
/// Add the function to the list of IR functions that need to be compiled.
/// Compiling only the functions that are used by the module saves time.
void AddFunctionToCompile(const std::string& fname) {
DCHECK(!module_finalized_);
ARROW_DCHECK(!module_finalized_);
functions_to_compile_.push_back(fname);
}

Expand Down
12 changes: 6 additions & 6 deletions cpp/src/gandiva/eval_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,22 @@ class EvalBatch {
int GetNumBuffers() const { return num_buffers_; }

const uint8_t* GetBuffer(int idx) const {
DCHECK(idx <= num_buffers_);
ARROW_DCHECK(idx <= num_buffers_);
return (buffers_array_.get())[idx];
}

uint8_t* GetBuffer(int idx) {
DCHECK(idx <= num_buffers_);
ARROW_DCHECK(idx <= num_buffers_);
return (buffers_array_.get())[idx];
}

int64_t GetBufferOffset(int idx) const {
DCHECK(idx <= num_buffers_);
ARROW_DCHECK(idx <= num_buffers_);
return (buffer_offsets_array_.get())[idx];
}

void SetBuffer(int idx, uint8_t* buffer, int64_t offset) {
DCHECK(idx <= num_buffers_);
ARROW_DCHECK(idx <= num_buffers_);
(buffers_array_.get())[idx] = buffer;
(buffer_offsets_array_.get())[idx] = offset;
}
Expand All @@ -80,11 +80,11 @@ class EvalBatch {
}

const uint8_t* GetLocalBitMap(int idx) const {
DCHECK(idx <= GetNumLocalBitMaps());
ARROW_DCHECK(idx <= GetNumLocalBitMaps());
return local_bitmaps_holder_->GetLocalBitMap(idx);
}
uint8_t* GetLocalBitMap(int idx) {
DCHECK(idx <= GetNumLocalBitMaps());
ARROW_DCHECK(idx <= GetNumLocalBitMaps());
return local_bitmaps_holder_->GetLocalBitMap(idx);
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/llvm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class GANDIVA_EXPORT LLVMTypes {
} else if (type->isFloatingPointTy()) {
return llvm::ConstantFP::get(type, 0);
} else {
DCHECK(type->isPointerTy());
ARROW_DCHECK(type->isPointerTy());
return llvm::ConstantPointerNull::getNullValue(type);
}
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/local_bitmaps_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class LocalBitMapsHolder {
uint8_t** GetLocalBitMapArray() const { return local_bitmaps_array_.get(); }

uint8_t* GetLocalBitMap(int idx) const {
DCHECK(idx <= GetNumLocalBitMaps());
ARROW_DCHECK(idx <= GetNumLocalBitMaps());
return local_bitmaps_array_.get()[idx];
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/selection_vector_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class SelectionVectorImpl : public SelectionVector {
int64_t GetNumSlots() const override { return num_slots_; }

void SetNumSlots(int64_t num_slots) override {
DCHECK_LE(num_slots, max_slots_);
ARROW_DCHECK_LE(num_slots, max_slots_);
num_slots_ = num_slots;
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/bloom_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
/// kMaximumBloomFilterBytes, and the return value is always a power of 2
static uint32_t OptimalNumOfBytes(uint32_t ndv, double fpp) {
uint32_t optimal_num_of_bits = OptimalNumOfBits(ndv, fpp);
DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits));
ARROW_DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits));
return optimal_num_of_bits >> 3;
}

Expand All @@ -233,7 +233,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
/// @return it always return a value between kMinimumBloomFilterBytes * 8 and
/// kMaximumBloomFilterBytes * 8, and the return value is always a power of 16
static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) {
DCHECK(fpp > 0.0 && fpp < 1.0);
ARROW_DCHECK(fpp > 0.0 && fpp < 1.0);
const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8));
uint32_t num_bits;

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@
#include "arrow/array/builder_primitive.h"
#include "arrow/chunked_array.h"
#include "arrow/type.h"
#include "arrow/util/bit_stream_utils.h"
#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/compression.h"
#include "arrow/util/crc32.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/logging.h"
#include "arrow/util/rle_encoding.h"
#include "arrow/util/rle_encoding_internal.h"
#include "arrow/util/unreachable.h"
#include "parquet/column_page.h"
#include "parquet/encoding.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/column_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit_stream_utils.h"
#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/checked_cast.h"
Expand All @@ -41,7 +41,7 @@
#include "arrow/util/endian.h"
#include "arrow/util/float16.h"
#include "arrow/util/logging.h"
#include "arrow/util/rle_encoding.h"
#include "arrow/util/rle_encoding_internal.h"
#include "arrow/util/type_traits.h"
#include "arrow/visit_array_inline.h"
#include "parquet/column_page.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "arrow/type_traits.h"
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_run_reader.h"
#include "arrow/util/bit_stream_utils.h"
#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/bitmap_writer.h"
Expand All @@ -42,7 +42,7 @@
#include "arrow/util/hashing.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/logging.h"
#include "arrow/util/rle_encoding.h"
#include "arrow/util/rle_encoding_internal.h"
#include "arrow/util/ubsan.h"
#include "arrow/visit_data_inline.h"
#include "parquet/exception.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/level_conversion_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ template <bool has_repeated_parent>
int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size,
int64_t upper_bound_remaining, LevelInfo level_info,
::arrow::internal::FirstTimeBitmapWriter* writer) {
DCHECK_LE(batch_size, kExtractBitsSize);
ARROW_DCHECK_LE(batch_size, kExtractBitsSize);

// Greater than level_info.def_level - 1 implies >= the def_level
auto defined_bitmap = static_cast<extract_bitmap_t>(
Expand Down

0 comments on commit 12f68fc

Please sign in to comment.