From 246735f04e7d29ce3c8a681eabda7787fd514180 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Thu, 12 Oct 2023 08:19:31 -0400 Subject: [PATCH] GH-37979: [C++] Add support for specifying custom Array opening and closing delimiters to `arrow::PrettyPrintDelimiters` (#38187) ### Rationale for this change This is a follow up to #37981. in order to make the [`arrow::PrettyPrint`](https://github.com/apache/arrow/blob/7667b81bffcb5b361fab6d61c42ce396d98cc6e1/cpp/src/arrow/pretty_print.h#L101) functionality for `arrow::Array` more flexible, it would be useful to be able to specify a custom `Array` opening and closing delimiter other than `"["` and `"]"`. For example, the MATLAB interface wraps the Arrow C++ libraries and being able to specify a custom opening and closing delimiter for `Array` would make it possible to make the display of MATLAB `arrow.array.Array` objects more MATLAB-like. In order to support custom `Array` opening and closing delimiters, this pull request adds two new properties, `open` and `close`, to the [`arrow::PrettyPrintDelimiters`](https://github.com/apache/arrow/blob/c37059ad7b87f0cbb681f6388aca0e3f02860351/cpp/src/arrow/pretty_print.h#L38) struct. This enable use cases like the ability to display an `arrow::Array` as `<1,2,3>` instead of `[1,2,3]`, by setting `options.array_delimiters.open = "<"` and `options.array_delimiters.close = ">"`. ### What changes are included in this PR? This pull request adds two new properties to the [`arrow::PrettyPrintDelimiters`](https://github.com/apache/arrow/blob/c37059ad7b87f0cbb681f6388aca0e3f02860351/cpp/src/arrow/pretty_print.h#L38) struct: 1. `open` - the opening delimiter to use for an `Array` or `ChunkedArray` (default = `[`). 1. `close` - the closing delimiter to use for an `Array` or `ChunkedArray` (default = `]`). ### Are these changes tested? Yes. 1. Added two new tests: (1) `ArrayCustomOpenCloseDelimiter` and (2) `ChunkedArrayCustomOpenCloseDelimiter`. 2. All existing tests related to `arrow::PrettyPrint` pass. ### Are there any user-facing changes? Yes. This pull request adds two new public, user-facing properties, (1) `open` (of type `std::string`) and (2) `close` (also of type `std::string`) to the `PrettyPrintDelimiters` struct. This enables client code to specify custom opening and closing delimiters to use when printing an `arrow::Array` or `arrow::ChunkedArray` by changing the values of the nested `open` and `close` properties of the `array_delimiters`/`chunked_array_delimiters` properties of `PrettyPrintOptions`. ### Notes 1. This pull request was motivated by our desire to improve the display of Arrow related classes in the MATLAB interface, but it is hopefully a generic enough change that it may benefit other use cases too. ### Future Directions 1. Now that client code can easily specify custom opening, closing, and element delimiters, it may make sense to address #30951. * Closes: #37979 Authored-by: Kevin Gurney Signed-off-by: Joris Van den Bossche --- cpp/src/arrow/pretty_print.cc | 8 ++-- cpp/src/arrow/pretty_print.h | 6 +++ cpp/src/arrow/pretty_print_test.cc | 73 ++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index a4a1fa90c2878..a5410df7e9ae2 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -87,7 +87,7 @@ void PrettyPrinter::OpenArray(const Array& array) { if (!options_.skip_new_lines) { Indent(); } - (*sink_) << "["; + (*sink_) << options_.array_delimiters.open; if (array.length() > 0) { Newline(); indent_ += options_.indent_size; @@ -101,7 +101,7 @@ void PrettyPrinter::CloseArray(const Array& array) { Indent(); } } - (*sink_) << "]"; + (*sink_) << options_.array_delimiters.close; } void PrettyPrinter::Write(std::string_view data) { (*sink_) << data; } @@ -449,7 +449,7 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op for (int i = 0; i < indent; ++i) { (*sink) << " "; } - (*sink) << "["; + (*sink) << options.chunked_array_delimiters.open; if (!skip_new_lines) { *sink << "\n"; } @@ -488,7 +488,7 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op for (int i = 0; i < indent; ++i) { (*sink) << " "; } - (*sink) << "]"; + (*sink) << options.chunked_array_delimiters.close; return Status::OK(); } diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h index 96a214c68b8a6..ad68726716cc7 100644 --- a/cpp/src/arrow/pretty_print.h +++ b/cpp/src/arrow/pretty_print.h @@ -36,6 +36,12 @@ class Table; /// \brief Options for controlling which delimiters to use when printing /// an Array or ChunkedArray. struct ARROW_EXPORT PrettyPrintDelimiters { + /// Delimiter to use when opening an Array or ChunkedArray (e.g. "[") + std::string open = "["; + + /// Delimiter to use when closing an Array or ChunkedArray (e.g. "]") + std::string close = "]"; + /// Delimiter for separating individual elements of an Array (e.g. ","), /// or individual chunks of a ChunkedArray std::string element = ","; diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index 45bb4ecffe054..9217e190d5b62 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -259,6 +259,25 @@ TEST_F(TestPrettyPrint, ArrayCustomElementDelimiter) { } } +TEST_F(TestPrettyPrint, ArrayCustomOpenCloseDelimiter) { + PrettyPrintOptions options{}; + // Use a custom opening Array delimiter of "{", rather than the default "]". + options.array_delimiters.open = "{"; + // Use a custom closing Array delimiter of "}", rather than the default "]". + options.array_delimiters.close = "}"; + + std::vector is_valid = {true, true, false, true, false}; + std::vector values = {1, 2, 3, 4, 5}; + static const char* expected = R"expected({ + 1, + 2, + null, + 4, + null +})expected"; + CheckPrimitive(options, is_valid, values, expected, false); +} + TEST_F(TestPrettyPrint, Int8) { static const char* expected = R"expected([ 0, @@ -1131,6 +1150,60 @@ TEST_F(TestPrettyPrint, ChunkedArrayCustomElementDelimiter) { } } +TEST_F(TestPrettyPrint, ChunkedArrayCustomOpenCloseDelimiter) { + PrettyPrintOptions options{}; + // Use a custom opening Array delimiter of "{", rather than the default "]". + options.array_delimiters.open = "{"; + // Use a custom closing Array delimiter of "}", rather than the default "]". + options.array_delimiters.close = "}"; + // Use a custom opening ChunkedArray delimiter of "<", rather than the default "]". + options.chunked_array_delimiters.open = "<"; + // Use a custom closing ChunkedArray delimiter of ">", rather than the default "]". + options.chunked_array_delimiters.close = ">"; + + const auto chunk = ArrayFromJSON(int32(), "[1, 2, null, 4, null]"); + + // ChunkedArray with 1 chunk + { + const ChunkedArray chunked_array(chunk); + + static const char* expected = R"expected(< + { + 1, + 2, + null, + 4, + null + } +>)expected"; + CheckStream(chunked_array, options, expected); + } + + // ChunkedArray with 2 chunks + { + const ChunkedArray chunked_array({chunk, chunk}); + + static const char* expected = R"expected(< + { + 1, + 2, + null, + 4, + null + }, + { + 1, + 2, + null, + 4, + null + } +>)expected"; + + CheckStream(chunked_array, options, expected); + } +} + TEST_F(TestPrettyPrint, TablePrimitive) { std::shared_ptr int_field = field("column", int32()); auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");