Skip to content

Commit

Permalink
Merge branch 'apache:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Ox0400 authored Nov 17, 2023
2 parents 29b468e + e543ee6 commit 34bb3ec
Show file tree
Hide file tree
Showing 101 changed files with 5,591 additions and 1,502 deletions.
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ cpp/src/generated/*.cpp linguist-generated=true
cpp/src/generated/*.h linguist-generated=true
go/**/*.s linguist-generated=true
go/arrow/unionmode_string.go linguist-generated=true
go/arrow/internal/flatbuf/*.go linguist-generated=true
go/**/*.pb.go linguist-generated=true
go/parquet/internal/gen-go/parquet/*.go linguist-generated=true
r/R/RcppExports.R linguist-generated=true
r/R/arrowExports.R linguist-generated=true
r/src/RcppExports.cpp linguist-generated=true
Expand Down
3 changes: 0 additions & 3 deletions ci/scripts/util_free_space.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ du -hsc /usr/local/*
echo "::endgroup::"
# ~1GB
sudo rm -rf \
/usr/local/aws-cli \
/usr/local/aws-sam-cil \
/usr/local/julia* || :
echo "::group::/usr/local/bin/*"
Expand All @@ -34,8 +33,6 @@ echo "::endgroup::"
# ~1GB (From 1.2GB to 214MB)
sudo rm -rf \
/usr/local/bin/aliyun \
/usr/local/bin/aws \
/usr/local/bin/aws_completer \
/usr/local/bin/azcopy \
/usr/local/bin/bicep \
/usr/local/bin/cmake-gui \
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ set(ARROW_SRCS
util/debug.cc
util/decimal.cc
util/delimiting.cc
util/float16.cc
util/formatting.cc
util/future.cc
util/hashing.cc
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/arrow/chunked_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::MakeEmpty(
return std::make_shared<ChunkedArray>(std::move(new_chunks));
}

bool ChunkedArray::Equals(const ChunkedArray& other) const {
bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) const {
if (length_ != other.length()) {
return false;
}
Expand All @@ -102,9 +102,9 @@ bool ChunkedArray::Equals(const ChunkedArray& other) const {
// the underlying data independently of the chunk size.
return internal::ApplyBinaryChunked(
*this, other,
[](const Array& left_piece, const Array& right_piece,
int64_t ARROW_ARG_UNUSED(position)) {
if (!left_piece.Equals(right_piece)) {
[&](const Array& left_piece, const Array& right_piece,
int64_t ARROW_ARG_UNUSED(position)) {
if (!left_piece.Equals(right_piece, opts)) {
return Status::Invalid("Unequal piece");
}
return Status::OK();
Expand All @@ -129,14 +129,15 @@ bool mayHaveNaN(const arrow::DataType& type) {

} // namespace

bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other,
const EqualOptions& opts) const {
if (!other) {
return false;
}
if (this == other.get() && !mayHaveNaN(*type_)) {
return true;
}
return Equals(*other.get());
return Equals(*other.get(), opts);
}

bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,11 @@ class ARROW_EXPORT ChunkedArray {
///
/// Two chunked arrays can be equal only if they have equal datatypes.
/// However, they may be equal even if they have different chunkings.
bool Equals(const ChunkedArray& other) const;
bool Equals(const ChunkedArray& other,
const EqualOptions& opts = EqualOptions::Defaults()) const;
/// \brief Determine if two chunked arrays are equal.
bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
bool Equals(const std::shared_ptr<ChunkedArray>& other,
const EqualOptions& opts = EqualOptions::Defaults()) const;
/// \brief Determine if two chunked arrays approximately equal
bool ApproxEquals(const ChunkedArray& other,
const EqualOptions& = EqualOptions::Defaults()) const;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/testing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ arrow_install_all_headers("arrow/testing")

if(ARROW_BUILD_TESTS)
add_arrow_test(random_test)
add_arrow_test(gtest_util_test)
endif()
55 changes: 31 additions & 24 deletions cpp/src/arrow/testing/gtest_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,42 +145,46 @@ void AssertScalarsApproxEqual(const Scalar& expected, const Scalar& actual, bool
}

void AssertBatchesEqual(const RecordBatch& expected, const RecordBatch& actual,
bool check_metadata) {
bool check_metadata, const EqualOptions& options) {
AssertTsSame(expected, actual,
[&](const RecordBatch& expected, const RecordBatch& actual) {
return expected.Equals(actual, check_metadata);
return expected.Equals(actual, check_metadata, options);
});
}

void AssertBatchesApproxEqual(const RecordBatch& expected, const RecordBatch& actual) {
void AssertBatchesApproxEqual(const RecordBatch& expected, const RecordBatch& actual,
const EqualOptions& options) {
AssertTsSame(expected, actual,
[&](const RecordBatch& expected, const RecordBatch& actual) {
return expected.ApproxEquals(actual);
return expected.ApproxEquals(actual, options);
});
}

void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual) {
void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual,
const EqualOptions& options) {
ASSERT_EQ(expected.num_chunks(), actual.num_chunks()) << "# chunks unequal";
if (!actual.Equals(expected)) {
if (!actual.Equals(expected, options)) {
std::stringstream diff;
for (int i = 0; i < actual.num_chunks(); ++i) {
auto c1 = actual.chunk(i);
auto c2 = expected.chunk(i);
diff << "# chunk " << i << std::endl;
ARROW_IGNORE_EXPR(c1->Equals(c2, EqualOptions().diff_sink(&diff)));
ARROW_IGNORE_EXPR(c1->Equals(c2, options.diff_sink(&diff)));
}
FAIL() << diff.str();
}
}

void AssertChunkedEqual(const ChunkedArray& actual, const ArrayVector& expected) {
AssertChunkedEqual(ChunkedArray(expected, actual.type()), actual);
void AssertChunkedEqual(const ChunkedArray& actual, const ArrayVector& expected,
const EqualOptions& options) {
AssertChunkedEqual(ChunkedArray(expected, actual.type()), actual, options);
}

void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& actual) {
void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& actual,
const EqualOptions& options) {
// XXX: AssertChunkedEqual in gtest_util.h does not permit the chunk layouts
// to be different
if (!actual.Equals(expected)) {
if (!actual.Equals(expected, options)) {
std::stringstream pp_expected;
std::stringstream pp_actual;
::arrow::PrettyPrintOptions options(/*indent=*/2);
Expand Down Expand Up @@ -321,21 +325,23 @@ ASSERT_EQUAL_IMPL(Field, Field, "fields")
ASSERT_EQUAL_IMPL(Schema, Schema, "schemas")
#undef ASSERT_EQUAL_IMPL

void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose) {
void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose,
const EqualOptions& options) {
ASSERT_EQ(expected.kind(), actual.kind())
<< "expected:" << expected.ToString() << " got:" << actual.ToString();

switch (expected.kind()) {
case Datum::SCALAR:
AssertScalarsEqual(*expected.scalar(), *actual.scalar(), verbose);
AssertScalarsEqual(*expected.scalar(), *actual.scalar(), verbose, options);
break;
case Datum::ARRAY: {
auto expected_array = expected.make_array();
auto actual_array = actual.make_array();
AssertArraysEqual(*expected_array, *actual_array, verbose);
AssertArraysEqual(*expected_array, *actual_array, verbose, options);
} break;
case Datum::CHUNKED_ARRAY:
AssertChunkedEquivalent(*expected.chunked_array(), *actual.chunked_array());
AssertChunkedEquivalent(*expected.chunked_array(), *actual.chunked_array(),
options);
break;
default:
// TODO: Implement better print
Expand Down Expand Up @@ -479,21 +485,21 @@ Result<std::optional<std::string>> PrintArrayDiff(const ChunkedArray& expected,
}

void AssertTablesEqual(const Table& expected, const Table& actual, bool same_chunk_layout,
bool combine_chunks) {
bool combine_chunks, const EqualOptions& options) {
ASSERT_EQ(expected.num_columns(), actual.num_columns());

if (combine_chunks) {
auto pool = default_memory_pool();
ASSERT_OK_AND_ASSIGN(auto new_expected, expected.CombineChunks(pool));
ASSERT_OK_AND_ASSIGN(auto new_actual, actual.CombineChunks(pool));

AssertTablesEqual(*new_expected, *new_actual, false, false);
AssertTablesEqual(*new_expected, *new_actual, false, false, options);
return;
}

if (same_chunk_layout) {
for (int i = 0; i < actual.num_columns(); ++i) {
AssertChunkedEqual(*expected.column(i), *actual.column(i));
AssertChunkedEqual(*expected.column(i), *actual.column(i), options);
}
} else {
std::stringstream ss;
Expand Down Expand Up @@ -533,17 +539,18 @@ void CompareBatchWith(const RecordBatch& left, const RecordBatch& right,
}

void CompareBatch(const RecordBatch& left, const RecordBatch& right,
bool compare_metadata) {
bool compare_metadata, const EqualOptions& options) {
return CompareBatchWith(
left, right, compare_metadata,
[](const Array& left, const Array& right) { return left.Equals(right); });
[&](const Array& left, const Array& right) { return left.Equals(right, options); });
}

void ApproxCompareBatch(const RecordBatch& left, const RecordBatch& right,
bool compare_metadata) {
return CompareBatchWith(
left, right, compare_metadata,
[](const Array& left, const Array& right) { return left.ApproxEquals(right); });
bool compare_metadata, const EqualOptions& options) {
return CompareBatchWith(left, right, compare_metadata,
[&](const Array& left, const Array& right) {
return left.ApproxEquals(right, options);
});
}

std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
Expand Down
48 changes: 27 additions & 21 deletions cpp/src/arrow/testing/gtest_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,18 +221,22 @@ ARROW_TESTING_EXPORT void AssertScalarsEqual(
ARROW_TESTING_EXPORT void AssertScalarsApproxEqual(
const Scalar& expected, const Scalar& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
const RecordBatch& actual,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(const RecordBatch& expected,
const RecordBatch& actual);
ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
const ChunkedArray& actual);
ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
const ArrayVector& expected);
ARROW_TESTING_EXPORT void AssertBatchesEqual(
const RecordBatch& expected, const RecordBatch& actual, bool check_metadata = false,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(
const RecordBatch& expected, const RecordBatch& actual,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertChunkedEqual(
const ChunkedArray& expected, const ChunkedArray& actual,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertChunkedEqual(
const ChunkedArray& actual, const ArrayVector& expected,
const EqualOptions& options = TestingEqualOptions());
// Like ChunkedEqual, but permits different chunk layout
ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
const ChunkedArray& actual);
ARROW_TESTING_EXPORT void AssertChunkedEquivalent(
const ChunkedArray& expected, const ChunkedArray& actual,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
const ChunkedArray& expected, const ChunkedArray& actual,
const EqualOptions& options = TestingEqualOptions());
Expand Down Expand Up @@ -277,12 +281,13 @@ ARROW_TESTING_EXPORT void AssertSchemaNotEqual(const std::shared_ptr<Schema>& lh
ARROW_TESTING_EXPORT Result<std::optional<std::string>> PrintArrayDiff(
const ChunkedArray& expected, const ChunkedArray& actual);

ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
bool same_chunk_layout = true,
bool flatten = false);
ARROW_TESTING_EXPORT void AssertTablesEqual(
const Table& expected, const Table& actual, bool same_chunk_layout = true,
bool flatten = false, const EqualOptions& options = TestingEqualOptions());

ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
bool verbose = false);
ARROW_TESTING_EXPORT void AssertDatumsEqual(
const Datum& expected, const Datum& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
const Datum& expected, const Datum& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
Expand All @@ -296,12 +301,13 @@ void AssertNumericDataEqual(const C_TYPE* raw_data,
}
}

ARROW_TESTING_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right,
bool compare_metadata = true);
ARROW_TESTING_EXPORT void CompareBatch(
const RecordBatch& left, const RecordBatch& right, bool compare_metadata = true,
const EqualOptions& options = TestingEqualOptions());

ARROW_TESTING_EXPORT void ApproxCompareBatch(const RecordBatch& left,
const RecordBatch& right,
bool compare_metadata = true);
ARROW_TESTING_EXPORT void ApproxCompareBatch(
const RecordBatch& left, const RecordBatch& right, bool compare_metadata = true,
const EqualOptions& options = TestingEqualOptions());

// Check if the padding of the buffers of the array is zero.
// Also cause valgrind warnings if the padding bytes are uninitialized.
Expand Down
Loading

0 comments on commit 34bb3ec

Please sign in to comment.