From 85fc3ebe93ee731b55ff22021f4d55a7768aeb6f Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 12 Sep 2024 13:39:42 -0300 Subject: [PATCH] GH-42247: [C++] Support casting to and from utf8_view/binary_view (#43302) ### Rationale for this change We need casts between string (binary) and string-view (binary-view) types since they are semantically equivalent. ### What changes are included in this PR? - Add `is_binary_view_like()` type predicate - Add `BinaryViewTypes()` list including `STRING_VIEW/BINARY_VIEW` - New cast kernels ### Are these changes tested? Yes, but test coverage might be improved. ### Are there any user-facing changes? More casts are available. * GitHub Issue: #42247 Lead-authored-by: Felipe Oliveira Carvalho Co-authored-by: mwish Signed-off-by: Antoine Pitrou --- .../arrow/compute/kernels/codegen_internal.h | 19 +- .../compute/kernels/scalar_cast_boolean.cc | 6 + .../compute/kernels/scalar_cast_internal.cc | 7 +- .../compute/kernels/scalar_cast_numeric.cc | 24 +- .../compute/kernels/scalar_cast_string.cc | 289 +++++++++++++++++- .../arrow/compute/kernels/scalar_cast_test.cc | 146 ++++++--- cpp/src/arrow/type.cc | 12 +- cpp/src/arrow/type.h | 3 + cpp/src/arrow/type_test.cc | 2 + cpp/src/arrow/type_traits.h | 25 ++ cpp/src/arrow/util/binary_view_util.h | 13 + cpp/src/arrow/visit_data_inline.h | 3 +- 12 files changed, 473 insertions(+), 76 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 9e46a21887f8c..7f9be92f3a14b 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -133,7 +133,8 @@ struct GetViewType> { template struct GetViewType::value || - is_fixed_size_binary_type::value>> { + is_fixed_size_binary_type::value || + is_binary_view_like_type::value>> { using T = std::string_view; using PhysicalType = T; @@ -1265,6 +1266,22 @@ ArrayKernelExec GenerateVarBinary(detail::GetTypeId get_id) { } } +// Generate a kernel given a templated functor for binary-view types. Generates a +// single kernel for binary/string-view. +// +// See "Numeric" above for description of the generator functor +template