diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index d5f419f8a7dd8..dbd24796db52b 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -59,7 +59,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.12' - name: Install pygit2 binary wheel run: pip install pygit2 --only-binary pygit2 - name: Install Archery, Crossbow- and Test Dependencies diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index dbcbbff54953c..038a468a81276 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -43,7 +43,7 @@ jobs: - name: Set up Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: 3.8 + python-version: 3.12 - name: Install Archery and Crossbow dependencies run: pip install -e arrow/dev/archery[bot] - name: Handle GitHub comment event diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 4892767324335..77efda58cb3d2 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -43,7 +43,7 @@ jobs: - name: Setup Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: 3.8 + python-version: 3.12 - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build @@ -90,7 +90,7 @@ jobs: - name: Install Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: '3.8' + python-version: '3.12' - name: Install Ruby uses: ruby/setup-ruby@250fcd6a742febb1123a77a841497ccaa8b9e939 # v1.152.0 with: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e394347e95261..82b43ee2363b5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -53,7 +53,7 @@ jobs: - name: Setup Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: 3.8 + python-version: 3.12 - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 5303531f34350..306fc5135073d 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -59,7 +59,7 @@ jobs: - name: Setup Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: 3.8 + python-version: 3.12 - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml index c19576d2f659e..c535dc4a07de3 100644 --- a/.github/workflows/java_nightly.yml +++ b/.github/workflows/java_nightly.yml @@ -61,7 +61,7 @@ jobs: uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: cache: 'pip' - python-version: 3.8 + python-version: 3.12 - name: Install Archery shell: bash run: pip install -e arrow/dev/archery[all] diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml index 31ab32800705c..6af7dbe7680f5 100644 --- a/.github/workflows/pr_bot.yml +++ b/.github/workflows/pr_bot.yml @@ -84,7 +84,7 @@ jobs: - name: Set up Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: 3.8 + python-version: 3.12 - name: Install Archery and Crossbow dependencies run: pip install -e arrow/dev/archery[bot] - name: Handle PR workflow event diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 6e3797b29c21e..25d918bcc25aa 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -132,6 +132,7 @@ jobs: timeout-minutes: 60 env: ARROW_HOME: /usr/local + ARROW_AZURE: ON ARROW_DATASET: ON ARROW_FLIGHT: ON ARROW_GANDIVA: ON diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index a57a8cddea3c0..6629b5c8a5673 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -60,10 +60,10 @@ jobs: repository: ursacomputing/crossbow ref: main - name: Set up Python - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: cache: 'pip' - python-version: 3.8 + python-version: 3.12 - name: Install Archery shell: bash run: pip install -e arrow/dev/archery[all] @@ -86,7 +86,7 @@ jobs: exit 1 fi - name: Cache Repo - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: repo key: r-nightly-${{ github.run_id }} diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index ef00f7cf4751c..b8c792008a958 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,6 +16,11 @@ # under the License. aws-sdk-cpp=1.11.68 +azure-core-cpp>=1.10.3 +azure-identity-cpp>=1.6.0 +azure-storage-blobs-cpp>=12.10.0 +azure-storage-common-cpp>=12.5.0 +azure-storage-files-datalake-cpp>=12.9.0 benchmark>=1.6.0 boost-cpp>=1.68.0 brotli @@ -34,6 +39,7 @@ libutf8proc lz4-c make ninja +nodejs orc pkg-config python diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 7a54dcc86f8fa..dff1f2224809a 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -42,6 +42,13 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all +# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to +# be on the path for the tests to run. +ENV PATH=/opt/conda/envs/arrow/bin:$PATH + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + # We want to install the GCS testbench using the same Python binary that the Conda code will use. COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_gcs_testbench.sh default @@ -50,6 +57,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin ENV ARROW_ACERO=ON \ + ARROW_AZURE=ON \ ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_DEPENDENCY_SOURCE=CONDA \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile new file mode 100644 index 0000000000000..d56895a792f7c --- /dev/null +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:24.04 +FROM ${base} + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +# Installs LLVM toolchain, for Gandiva and testing other compilers +# +# Note that this is installed before the base packages to improve iteration +# while debugging package list with docker build. +ARG clang_tools +ARG llvm +RUN latest_system_llvm=14 && \ + if [ ${llvm} -gt ${latest_system_llvm} -o \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + lsb-release \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + code_name=$(lsb_release --codename --short) && \ + if [ ${llvm} -gt 10 ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list; \ + fi && \ + if [ ${clang_tools} -ne ${llvm} -a \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${clang_tools} main" > \ + /etc/apt/sources.list.d/clang-tools.list; \ + fi; \ + fi && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + clang-${clang_tools} \ + clang-${llvm} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + llvm-${llvm}-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Installs C++ toolchain and dependencies +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + curl \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + libidn2-dev \ + libkrb5-dev \ + libldap-dev \ + liblz4-dev \ + libnghttp2-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libpsl-dev \ + libre2-dev \ + librtmp-dev \ + libsnappy-dev \ + libsqlite3-dev \ + libssh-dev \ + libssh2-1-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libxml2-dev \ + libzstd-dev \ + make \ + ninja-build \ + nlohmann-json3-dev \ + npm \ + pkg-config \ + protobuf-compiler \ + protobuf-compiler-grpc \ + python3-dev \ + python3-pip \ + python3-venv \ + rapidjson-dev \ + rsync \ + tzdata \ + tzdata-legacy \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +ARG gcc_version="" +RUN if [ "${gcc_version}" = "" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++ \ + gcc; \ + else \ + if [ "${gcc_version}" -gt "12" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/volatile; \ + fi; \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++-${gcc_version} \ + gcc-${gcc_version} && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \ + update-alternatives --install \ + /usr/bin/$(uname --machine)-linux-gnu-gcc \ + $(uname --machine)-linux-gnu-gcc \ + /usr/bin/$(uname --machine)-linux-gnu-gcc-${gcc_version} 100 && \ + update-alternatives --install \ + /usr/bin/$(uname --machine)-linux-gnu-g++ \ + $(uname --machine)-linux-gnu-g++ \ + /usr/bin/$(uname --machine)-linux-gnu-g++-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \ + update-alternatives --set cc /usr/bin/gcc && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \ + update-alternatives --set c++ /usr/bin/g++; \ + fi + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh latest /usr/local + +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh default + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + +COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin + +# Prioritize system packages and local installation +ENV ARROW_ACERO=ON \ + ARROW_AZURE=ON \ + ARROW_BUILD_STATIC=ON \ + ARROW_BUILD_TESTS=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=ON \ + ARROW_FLIGHT_SQL=ON \ + ARROW_GANDIVA=ON \ + ARROW_GCS=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_S3=ON \ + ARROW_SUBSTRAIT=ON \ + ARROW_USE_ASAN=OFF \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_UBSAN=OFF \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_OPENTELEMETRY=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \ + AWSSDK_SOURCE=BUNDLED \ + Azure_SOURCE=BUNDLED \ + google_cloud_cpp_storage_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + PYTHON=python3 \ + xsimd_SOURCE=BUNDLED diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index 1d685c51a9326..a23ea8eb1cd34 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -86,6 +86,7 @@ ctest \ --label-regex unittest \ --output-on-failure \ --parallel ${n_jobs} \ + --repeat until-pass:3 \ --timeout ${ARROW_CTEST_TIMEOUT:-300} \ "${ctest_options[@]}" \ "$@" diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index 0aa6d20975b49..2090290c99322 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -34,6 +34,9 @@ case "$(uname -m)" in ;; esac +# On newer pythons install into the system will fail, so override that +export PIP_BREAK_SYSTEM_PACKAGES=1 + version=$1 if [[ "${version}" -eq "default" ]]; then version="v0.39.0" diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index d3c6f0e6ade89..1cd1000aa3903 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -28,6 +28,7 @@ export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON} +export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} export PYARROW_WITH_S3=${ARROW_S3:-OFF} export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 20ca3300c0538..7b803518494ee 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -52,6 +52,7 @@ fi : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}} export PYARROW_TEST_ACERO +export PYARROW_TEST_AZURE export PYARROW_TEST_CUDA export PYARROW_TEST_DATASET export PYARROW_TEST_FLIGHT diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7f2f7812e3cd5..b6d9ad5a5990e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -683,307 +683,35 @@ if(${INFER_FOUND}) endif() # -# Linker and Dependencies +# Link targets # -# Libraries to link statically with libarrow.so. -# -# Note that arrow::hadoop is a header only target that refers -# cpp/thirdparty/hadoop/include/. See -# cpp/cmake_modules/ThirdpartyToolchain.cmake for details. -set(ARROW_SHARED_LINK_LIBS arrow::flatbuffers arrow::hadoop) -set(ARROW_SHARED_INSTALL_INTERFACE_LIBS) -set(ARROW_STATIC_LINK_LIBS arrow::flatbuffers arrow::hadoop) -set(ARROW_STATIC_INSTALL_INTERFACE_LIBS) - -# We must use google-cloud-cpp::storage first. If -# google-cloud-cpp::storage depends on bundled Abseil, bundled Abseil -# and system Abseil may be mixed. -# -# For example, if Boost::headers is used before -# google-cloud-cpp::storage AND Boost::headers has -# -I/opt/homebrew/include AND /opt/homebrew/include/absl/ exists, -# /opt/homebrew/include/absl/**/*.h are used instead of .h provided by -# bundled Abseil. -if(ARROW_GCS) - list(APPEND ARROW_SHARED_LINK_LIBS google-cloud-cpp::storage) - list(APPEND ARROW_STATIC_LINK_LIBS google-cloud-cpp::storage) - if(google_cloud_cpp_storage_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS google-cloud-cpp::storage) - endif() -endif() - -if(ARROW_USE_BOOST) - list(APPEND ARROW_SHARED_LINK_LIBS Boost::headers) - list(APPEND ARROW_STATIC_LINK_LIBS Boost::headers) -endif() - -if(ARROW_USE_OPENSSL) - set(ARROW_OPENSSL_LIBS OpenSSL::Crypto OpenSSL::SSL) - list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_OPENSSL_LIBS}) - list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_OPENSSL_LIBS}) - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_OPENSSL_LIBS}) -endif() - -if(ARROW_WITH_BROTLI) - # Order is important for static linking - set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon) - list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_BROTLI_LIBS}) - list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS}) - if(Brotli_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_BROTLI_LIBS}) - endif() -endif() - -if(ARROW_WITH_BZ2) - list(APPEND ARROW_STATIC_LINK_LIBS BZip2::BZip2) - if(BZip2_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS BZip2::BZip2) - endif() -endif() - -if(ARROW_WITH_LZ4) - list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4) - if(lz4_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4) - endif() -endif() - -if(ARROW_WITH_SNAPPY) - list(APPEND ARROW_STATIC_LINK_LIBS ${Snappy_TARGET}) - if(Snappy_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${Snappy_TARGET}) - endif() -endif() - -if(ARROW_WITH_ZLIB) - list(APPEND ARROW_STATIC_LINK_LIBS ZLIB::ZLIB) - if(ZLIB_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ZLIB::ZLIB) - endif() -endif() - -if(ARROW_WITH_ZSTD) - list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_ZSTD_LIBZSTD}) - if(zstd_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_ZSTD_LIBZSTD}) - endif() -endif() - -if(ARROW_ORC) - list(APPEND ARROW_SHARED_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF}) - list(APPEND ARROW_STATIC_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF}) - if(ORC_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc) - endif() -endif() - -if(ARROW_USE_GLOG) - list(APPEND ARROW_SHARED_LINK_LIBS glog::glog) - list(APPEND ARROW_STATIC_LINK_LIBS glog::glog) - if(GLOG_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS glog::glog) - endif() - add_definitions("-DARROW_USE_GLOG") -endif() - -if(ARROW_S3) - list(APPEND ARROW_SHARED_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) - list(APPEND ARROW_STATIC_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) - if(AWSSDK_SOURCE STREQUAL "SYSTEM") - list(APPEND - ARROW_STATIC_INSTALL_INTERFACE_LIBS - aws-cpp-sdk-identity-management - aws-cpp-sdk-sts - aws-cpp-sdk-cognito-identity - aws-cpp-sdk-s3 - aws-cpp-sdk-core) - elseif(AWSSDK_SOURCE STREQUAL "BUNDLED") - if(UNIX) - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) - endif() - endif() -endif() - -if(ARROW_WITH_OPENTELEMETRY) - list(APPEND - ARROW_SHARED_LINK_LIBS - opentelemetry-cpp::trace - opentelemetry-cpp::ostream_span_exporter - opentelemetry-cpp::otlp_http_exporter) - list(APPEND - ARROW_STATIC_LINK_LIBS - opentelemetry-cpp::trace - opentelemetry-cpp::ostream_span_exporter - opentelemetry-cpp::otlp_http_exporter) - if(opentelemetry_SOURCE STREQUAL "SYSTEM") - list(APPEND - ARROW_STATIC_INSTALL_INTERFACE_LIBS - opentelemetry-cpp::trace - opentelemetry-cpp::ostream_span_exporter - opentelemetry-cpp::otlp_http_exporter) - endif() - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) -endif() - -if(ARROW_WITH_AZURE_SDK) - list(APPEND ARROW_SHARED_LINK_LIBS ${AZURE_SDK_LINK_LIBRARIES}) - list(APPEND ARROW_STATIC_LINK_LIBS ${AZURE_SDK_LINK_LIBRARIES}) -endif() - -if(ARROW_WITH_UTF8PROC) - list(APPEND ARROW_SHARED_LINK_LIBS utf8proc::utf8proc) - list(APPEND ARROW_STATIC_LINK_LIBS utf8proc::utf8proc) - if(utf8proc_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS utf8proc::utf8proc) - endif() -endif() - -if(ARROW_WITH_RE2) - list(APPEND ARROW_SHARED_LINK_LIBS re2::re2) - list(APPEND ARROW_STATIC_LINK_LIBS re2::re2) - if(re2_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS re2::re2) - endif() -endif() - -if(ARROW_WITH_RAPIDJSON) - list(APPEND ARROW_SHARED_LINK_LIBS rapidjson::rapidjson) - list(APPEND ARROW_STATIC_LINK_LIBS rapidjson::rapidjson) -endif() - -if(ARROW_USE_XSIMD) - list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_XSIMD}) - list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_XSIMD}) -endif() - -# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY) -# because they depend on Protobuf. -if(ARROW_WITH_PROTOBUF) - if(Protobuf_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF}) - endif() -endif() - -add_custom_target(arrow_dependencies) -add_custom_target(arrow_benchmark_dependencies) -add_custom_target(arrow_test_dependencies) - -# ARROW-4581: CMake can be finicky about invoking the ExternalProject builds -# for some of the library dependencies, so we "nuke it from orbit" by making -# the toolchain dependency explicit using these "dependencies" targets -add_dependencies(arrow_dependencies toolchain) -add_dependencies(arrow_test_dependencies toolchain-tests) - -if(ARROW_STATIC_LINK_LIBS) - add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS}) - if(ARROW_HDFS OR ARROW_ORC) - if(NOT MSVC_TOOLCHAIN) - list(APPEND ARROW_STATIC_LINK_LIBS ${CMAKE_DL_LIBS}) - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS}) - endif() - endif() -endif() - -set(ARROW_SHARED_PRIVATE_LINK_LIBS ${ARROW_STATIC_LINK_LIBS}) - -if(NOT MSVC_TOOLCHAIN) - list(APPEND ARROW_SHARED_LINK_LIBS ${CMAKE_DL_LIBS}) -endif() - -set(ARROW_TEST_LINK_TOOLCHAIN arrow::flatbuffers ${ARROW_GTEST_GMOCK} - ${ARROW_GTEST_GTEST_MAIN}) - -if(ARROW_BUILD_TESTS) - add_dependencies(arrow_test_dependencies ${ARROW_TEST_LINK_TOOLCHAIN}) -endif() - -if(ARROW_BUILD_BENCHMARKS) - # Some benchmarks use gtest - add_dependencies(arrow_benchmark_dependencies arrow_test_dependencies - toolchain-benchmarks) -endif() - -set(ARROW_TEST_STATIC_LINK_LIBS arrow_testing_static arrow_static - ${ARROW_SHARED_LINK_LIBS} ${ARROW_TEST_LINK_TOOLCHAIN}) - -set(ARROW_TEST_SHARED_LINK_LIBS arrow_testing_shared arrow_shared - ${ARROW_SHARED_LINK_LIBS} ${ARROW_TEST_LINK_TOOLCHAIN}) - -if(NOT MSVC) - list(APPEND ARROW_TEST_SHARED_LINK_LIBS ${CMAKE_DL_LIBS}) -endif() - if("${ARROW_TEST_LINKAGE}" STREQUAL "shared") if(ARROW_BUILD_TESTS AND NOT ARROW_BUILD_SHARED) message(FATAL_ERROR "If using shared linkage for unit tests, must also \ pass ARROW_BUILD_SHARED=on") endif() # Use shared linking for unit tests if it's available - set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_SHARED_LINK_LIBS}) + set(ARROW_TEST_LINK_LIBS arrow_testing_shared ${ARROW_GTEST_GMOCK} + ${ARROW_GTEST_GTEST_MAIN}) set(ARROW_EXAMPLE_LINK_LIBS arrow_shared) else() if(ARROW_BUILD_TESTS AND NOT ARROW_BUILD_STATIC) message(FATAL_ERROR "If using static linkage for unit tests, must also \ pass ARROW_BUILD_STATIC=on") endif() - set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_STATIC_LINK_LIBS}) + set(ARROW_TEST_LINK_LIBS arrow_testing_static ${ARROW_GTEST_GMOCK} + ${ARROW_GTEST_GTEST_MAIN}) set(ARROW_EXAMPLE_LINK_LIBS arrow_static) endif() if(ARROW_BUILD_BENCHMARKS) - # In the case that benchmark::benchmark_main is not available, - # we need to provide our own version. This only happens for older versions - # of benchmark. - if(NOT TARGET benchmark::benchmark_main) - add_library(arrow_benchmark_main STATIC src/arrow/util/benchmark_main.cc) - add_library(benchmark::benchmark_main ALIAS arrow_benchmark_main) - endif() - - set(ARROW_BENCHMARK_LINK_LIBS benchmark::benchmark_main benchmark::benchmark - ${ARROW_TEST_LINK_LIBS}) + set(ARROW_BENCHMARK_LINK_LIBS benchmark::benchmark_main ${ARROW_TEST_LINK_LIBS}) if(WIN32) - set(ARROW_BENCHMARK_LINK_LIBS Shlwapi.dll ${ARROW_BENCHMARK_LINK_LIBS}) + list(APPEND ARROW_BENCHMARK_LINK_LIBS Shlwapi.dll) endif() endif() -if(ARROW_JEMALLOC) - list(APPEND ARROW_SHARED_LINK_LIBS jemalloc::jemalloc) - list(APPEND ARROW_STATIC_LINK_LIBS jemalloc::jemalloc) -endif() - -if(ARROW_MIMALLOC) - add_definitions(-DARROW_MIMALLOC) - list(APPEND ARROW_SHARED_LINK_LIBS mimalloc::mimalloc) - list(APPEND ARROW_STATIC_LINK_LIBS mimalloc::mimalloc) -endif() - -# ---------------------------------------------------------------------- -# Handle platform-related libraries like -pthread - -set(ARROW_SYSTEM_LINK_LIBS) - -if(ARROW_ENABLE_THREADING) - list(APPEND ARROW_SYSTEM_LINK_LIBS Threads::Threads) -endif() -if(CMAKE_THREAD_LIBS_INIT) - string(APPEND ARROW_PC_LIBS_PRIVATE " ${CMAKE_THREAD_LIBS_INIT}") -endif() - -if(WIN32) - # Winsock - list(APPEND ARROW_SYSTEM_LINK_LIBS "ws2_32.dll") -endif() - -if(NOT WIN32 AND NOT APPLE) - # Pass -lrt on Linux only - list(APPEND ARROW_SYSTEM_LINK_LIBS rt) -endif() - -list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_SYSTEM_LINK_LIBS}) -list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_SYSTEM_LINK_LIBS}) -list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS}) - # # Subdirectories # diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 083ac2fe9a862..7a45e9cca59de 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -97,7 +97,27 @@ function(arrow_create_merged_static_lib output_target) endforeach() if(APPLE) - set(BUNDLE_COMMAND "libtool" "-no_warning_for_no_symbols" "-static" "-o" + # The apple-distributed libtool is what we want for bundling, but there is + # a GNU libtool that has a namecollision (and happens to be bundled with R, too). + # We are not compatible with GNU libtool, so we need to avoid it. + + # check in the obvious places first to find Apple's libtool + # HINTS is used before system paths and before PATHS, so we use that + # even though hard coded paths should go in PATHS + # TODO: use a VALIDATOR when we require cmake >= 3.25 + find_program(LIBTOOL_MACOS libtool HINTS /usr/bin + /Library/Developer/CommandLineTools/usr/bin) + + # confirm that the libtool we found is not GNU libtool + execute_process(COMMAND ${LIBTOOL_MACOS} -V + OUTPUT_VARIABLE LIBTOOL_V_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT "${LIBTOOL_V_OUTPUT}" MATCHES ".*cctools-([0-9.]+).*") + message(FATAL_ERROR "libtool found appears to be the incompatible GNU libtool: ${LIBTOOL_MACOS}" + ) + endif() + + set(BUNDLE_COMMAND ${LIBTOOL_MACOS} "-no_warning_for_no_symbols" "-static" "-o" ${output_lib_path} ${all_library_paths}) elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel|IntelLLVM)$") set(ar_script_path ${CMAKE_BINARY_DIR}/${ARG_NAME}.ar) diff --git a/cpp/cmake_modules/FindRapidJSONAlt.cmake b/cpp/cmake_modules/FindRapidJSONAlt.cmake index ef5acf18b8223..babb450e204a7 100644 --- a/cpp/cmake_modules/FindRapidJSONAlt.cmake +++ b/cpp/cmake_modules/FindRapidJSONAlt.cmake @@ -29,7 +29,14 @@ endif() find_package(RapidJSON ${find_package_args}) if(RapidJSON_FOUND) set(RapidJSONAlt_FOUND TRUE) - set(RAPIDJSON_INCLUDE_DIR ${RAPIDJSON_INCLUDE_DIRS}) + if(NOT TARGET RapidJSON) + add_library(RapidJSON INTERFACE IMPORTED) + if(RapidJSON_INCLUDE_DIRS) + target_include_directories(RapidJSON INTERFACE "${RapidJSON_INCLUDE_DIRS}") + else() + target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIRS}") + endif() + endif() return() endif() @@ -74,3 +81,14 @@ find_package_handle_standard_args( RapidJSONAlt REQUIRED_VARS RAPIDJSON_INCLUDE_DIR VERSION_VAR RAPIDJSON_VERSION) + +if(RapidJSONAlt_FOUND) + if(WIN32 AND "${RAPIDJSON_INCLUDE_DIR}" MATCHES "^/") + # MSYS2 + execute_process(COMMAND "cygpath" "--windows" "${RAPIDJSON_INCLUDE_DIR}" + OUTPUT_VARIABLE RAPIDJSON_INCLUDE_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() + add_library(RapidJSON INTERFACE IMPORTED) + target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIR}") +endif() diff --git a/cpp/cmake_modules/FindGLOG.cmake b/cpp/cmake_modules/FindglogAlt.cmake similarity index 77% rename from cpp/cmake_modules/FindGLOG.cmake rename to cpp/cmake_modules/FindglogAlt.cmake index 61b7d0694efd4..eb16636add958 100644 --- a/cpp/cmake_modules/FindGLOG.cmake +++ b/cpp/cmake_modules/FindglogAlt.cmake @@ -15,9 +15,22 @@ # # Usage of this module as follows: # -# find_package(GLOG) +# find_package(glogAlt) -if(GLOG_FOUND) +if(glogAlt_FOUND) + return() +endif() + +set(find_package_args CONFIG) +if(glogAlt_FIND_VERSION) + list(APPEND find_package_args ${glogAlt_FIND_VERSION}) +endif() +if(glogAlt_FIND_QUIETLY) + list(APPEND find_package_args QUIET) +endif() +find_package(glog ${find_package_args}) +if(glog_FOUND) + set(glogAlt_FOUND TRUE) return() endif() @@ -50,11 +63,12 @@ else() PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) endif() -find_package_handle_standard_args(GLOG REQUIRED_VARS GLOG_INCLUDE_DIR GLOG_LIB) +find_package_handle_standard_args(glogAlt REQUIRED_VARS GLOG_INCLUDE_DIR GLOG_LIB) -if(GLOG_FOUND) +if(glogAlt_FOUND) add_library(glog::glog UNKNOWN IMPORTED) set_target_properties(glog::glog PROPERTIES IMPORTED_LOCATION "${GLOG_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GLOG_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${GLOG_INCLUDE_DIR}" + INTERFACE_COMPILE_DEFINITIONS "GLOG_USE_GLOG_EXPORT") endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b16ee07756013..b6b6ac18ea8cb 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -18,11 +18,6 @@ include(ProcessorCount) processorcount(NPROC) -add_custom_target(rapidjson) -add_custom_target(toolchain) -add_custom_target(toolchain-benchmarks) -add_custom_target(toolchain-tests) - # Accumulate all bundled targets and we will splice them together later as # libarrow_bundled_dependencies.a so that third party libraries have something # usable to create statically-linked builds with some BUNDLED dependencies, @@ -56,7 +51,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES BZip2 c-ares gflags - GLOG + glog google_cloud_cpp_storage gRPC GTest @@ -116,6 +111,14 @@ if("${lz4_SOURCE}" STREQUAL "" AND NOT "${Lz4_SOURCE}" STREQUAL "") set(lz4_SOURCE ${Lz4_SOURCE}) endif() +# For backward compatibility. We use "GLOG_SOURCE" if "glog_SOURCE" +# isn't specified and "GLOG_SOURCE" is specified. +# We renamed "GLOG" dependency name to "glog" in 16.0.0 because +# upstream uses "glog" not "GLOG" as package name. +if("${glog_SOURCE}" STREQUAL "" AND NOT "${GLOG_SOURCE}" STREQUAL "") + set(glog_SOURCE ${GLOG_SOURCE}) +endif() + # For backward compatibility. We use bundled jemalloc by default. if("${jemalloc_SOURCE}" STREQUAL "") set(jemalloc_SOURCE "BUNDLED") @@ -185,7 +188,7 @@ macro(build_dependency DEPENDENCY_NAME) build_cares() elseif("${DEPENDENCY_NAME}" STREQUAL "gflags") build_gflags() - elseif("${DEPENDENCY_NAME}" STREQUAL "GLOG") + elseif("${DEPENDENCY_NAME}" STREQUAL "glog") build_glog() elseif("${DEPENDENCY_NAME}" STREQUAL "google_cloud_cpp_storage") build_google_cloud_cpp_storage() @@ -1150,7 +1153,6 @@ macro(build_boost) if(NOT TARGET Boost::dynamic_linking) # This doesn't add BOOST_ALL_DYN_LINK because bundled Boost is a static library. add_library(Boost::dynamic_linking INTERFACE IMPORTED) - add_dependencies(toolchain boost_ep) endif() set(BOOST_VENDORED TRUE) endmacro() @@ -1364,7 +1366,6 @@ macro(build_snappy) set_target_properties(${Snappy_TARGET} PROPERTIES IMPORTED_LOCATION "${SNAPPY_STATIC_LIB}") target_include_directories(${Snappy_TARGET} BEFORE INTERFACE "${SNAPPY_PREFIX}/include") - add_dependencies(toolchain snappy_ep) add_dependencies(${Snappy_TARGET} snappy_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS ${Snappy_TARGET}) @@ -1415,7 +1416,6 @@ macro(build_brotli) CMAKE_ARGS ${BROTLI_CMAKE_ARGS} STEP_TARGETS headers_copy) - add_dependencies(toolchain brotli_ep) file(MAKE_DIRECTORY "${BROTLI_INCLUDE_DIR}") add_library(Brotli::brotlicommon STATIC IMPORTED) @@ -1451,6 +1451,8 @@ if(ARROW_WITH_BROTLI) PC_PACKAGE_NAMES libbrotlidec libbrotlienc) + # Order is important for static linking + set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon) endif() if(PARQUET_REQUIRE_ENCRYPTION AND NOT ARROW_PARQUET) @@ -1471,6 +1473,7 @@ if(PARQUET_REQUIRE_ENCRYPTION REQUIRED_VERSION ${ARROW_OPENSSL_REQUIRED_VERSION}) set(ARROW_USE_OPENSSL ON) + set(ARROW_OPENSSL_LIBS OpenSSL::Crypto OpenSSL::SSL) endif() if(ARROW_USE_OPENSSL) @@ -1522,7 +1525,6 @@ macro(build_glog) BUILD_BYPRODUCTS "${GLOG_STATIC_LIB}" CMAKE_ARGS ${GLOG_CMAKE_ARGS}) - add_dependencies(toolchain glog_ep) file(MAKE_DIRECTORY "${GLOG_INCLUDE_DIR}") add_library(glog::glog STATIC IMPORTED) @@ -1534,7 +1536,11 @@ macro(build_glog) endmacro() if(ARROW_USE_GLOG) - resolve_dependency(GLOG PC_PACKAGE_NAMES libglog) + resolve_dependency(glog + HAVE_ALT + TRUE + PC_PACKAGE_NAMES + libglog) endif() # ---------------------------------------------------------------------- @@ -1581,8 +1587,6 @@ macro(build_gflags) BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}" CMAKE_ARGS ${GFLAGS_CMAKE_ARGS}) - add_dependencies(toolchain gflags_ep) - add_thirdparty_lib(gflags::gflags_static STATIC ${GFLAGS_STATIC_LIB}) add_dependencies(gflags::gflags_static gflags_ep) set(GFLAGS_LIBRARY gflags::gflags_static) @@ -1694,7 +1698,6 @@ macro(build_thrift) if(ARROW_USE_BOOST) target_link_libraries(thrift::thrift INTERFACE Boost::headers) endif() - add_dependencies(toolchain thrift_ep) add_dependencies(thrift::thrift thrift_ep) set(Thrift_VERSION ${ARROW_THRIFT_BUILD_VERSION}) set(THRIFT_VENDORED TRUE) @@ -1791,7 +1794,6 @@ macro(build_protobuf) set_target_properties(arrow::protobuf::protoc PROPERTIES IMPORTED_LOCATION "${PROTOBUF_COMPILER}") - add_dependencies(toolchain protobuf_ep) add_dependencies(arrow::protobuf::libprotobuf protobuf_ep) add_dependencies(arrow::protobuf::protoc protobuf_ep) @@ -2133,7 +2135,6 @@ if(ARROW_MIMALLOC) target_link_libraries(mimalloc::mimalloc INTERFACE "bcrypt.lib" "psapi.lib") endif() add_dependencies(mimalloc::mimalloc mimalloc_ep) - add_dependencies(toolchain mimalloc_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc) @@ -2291,8 +2292,8 @@ macro(build_benchmark) PROPERTIES IMPORTED_LOCATION "${GBENCHMARK_MAIN_STATIC_LIB}") target_include_directories(benchmark::benchmark_main BEFORE INTERFACE "${GBENCHMARK_INCLUDE_DIR}") + target_link_libraries(benchmark::benchmark_main INTERFACE benchmark::benchmark) - add_dependencies(toolchain-benchmarks gbenchmark_ep) add_dependencies(benchmark::benchmark gbenchmark_ep) add_dependencies(benchmark::benchmark_main gbenchmark_ep) endmacro() @@ -2328,9 +2329,9 @@ macro(build_rapidjson) # The include directory must exist before it is referenced by a target. file(MAKE_DIRECTORY "${RAPIDJSON_INCLUDE_DIR}") - add_dependencies(toolchain rapidjson_ep) - add_dependencies(toolchain-tests rapidjson_ep) - add_dependencies(rapidjson rapidjson_ep) + add_library(RapidJSON INTERFACE IMPORTED) + target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIR}") + add_dependencies(RapidJSON rapidjson_ep) set(RAPIDJSON_VENDORED TRUE) endmacro() @@ -2344,19 +2345,6 @@ if(ARROW_WITH_RAPIDJSON) ${ARROW_RAPIDJSON_REQUIRED_VERSION} IS_RUNTIME_DEPENDENCY FALSE) - - if(RapidJSON_INCLUDE_DIR) - set(RAPIDJSON_INCLUDE_DIR "${RapidJSON_INCLUDE_DIR}") - endif() - if(WIN32 AND "${RAPIDJSON_INCLUDE_DIR}" MATCHES "^/") - # MSYS2 - execute_process(COMMAND "cygpath" "--windows" "${RAPIDJSON_INCLUDE_DIR}" - OUTPUT_VARIABLE RAPIDJSON_INCLUDE_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE) - endif() - - add_library(rapidjson::rapidjson INTERFACE IMPORTED) - target_include_directories(rapidjson::rapidjson INTERFACE "${RAPIDJSON_INCLUDE_DIR}") endif() macro(build_xsimd) @@ -2375,8 +2363,9 @@ macro(build_xsimd) # The include directory must exist before it is referenced by a target. file(MAKE_DIRECTORY "${XSIMD_INCLUDE_DIR}") - add_dependencies(toolchain xsimd_ep) - add_dependencies(toolchain-tests xsimd_ep) + add_library(arrow::xsimd INTERFACE IMPORTED) + target_include_directories(arrow::xsimd INTERFACE "${XSIMD_INCLUDE_DIR}") + add_dependencies(arrow::xsimd xsimd_ep) set(XSIMD_VENDORED TRUE) endmacro() @@ -2398,8 +2387,6 @@ if(ARROW_USE_XSIMD) "8.1.0") if(xsimd_SOURCE STREQUAL "BUNDLED") - add_library(arrow::xsimd INTERFACE IMPORTED) - target_include_directories(arrow::xsimd INTERFACE "${XSIMD_INCLUDE_DIR}") set(ARROW_XSIMD arrow::xsimd) else() message(STATUS "xsimd found. Headers: ${xsimd_INCLUDE_DIRS}") @@ -2437,7 +2424,6 @@ macro(build_zlib) set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") - add_dependencies(toolchain zlib_ep) add_dependencies(ZLIB::ZLIB zlib_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) @@ -2473,7 +2459,6 @@ macro(build_lz4) add_library(LZ4::lz4 STATIC IMPORTED) set_target_properties(LZ4::lz4 PROPERTIES IMPORTED_LOCATION "${LZ4_STATIC_LIB}") target_include_directories(LZ4::lz4 BEFORE INTERFACE "${LZ4_PREFIX}/include") - add_dependencies(toolchain lz4_ep) add_dependencies(LZ4::lz4 lz4_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS LZ4::lz4) @@ -2526,7 +2511,6 @@ macro(build_zstd) target_include_directories(zstd::libzstd_static BEFORE INTERFACE "${ZSTD_PREFIX}/include") - add_dependencies(toolchain zstd_ep) add_dependencies(zstd::libzstd_static zstd_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS zstd::libzstd_static) @@ -2583,7 +2567,6 @@ macro(build_re2) set_target_properties(re2::re2 PROPERTIES IMPORTED_LOCATION "${RE2_STATIC_LIB}") target_include_directories(re2::re2 BEFORE INTERFACE "${RE2_PREFIX}/include") - add_dependencies(toolchain re2_ep) add_dependencies(re2::re2 re2_ep) set(RE2_VENDORED TRUE) # Set values so that FindRE2 finds this too @@ -2599,7 +2582,6 @@ if(ARROW_WITH_RE2) TRUE PC_PACKAGE_NAMES re2) - add_definitions(-DARROW_WITH_RE2) endif() macro(build_bzip2) @@ -2642,7 +2624,6 @@ macro(build_bzip2) target_include_directories(BZip2::BZip2 BEFORE INTERFACE "${BZIP2_PREFIX}/include") set(BZIP2_INCLUDE_DIR "${BZIP2_PREFIX}/include") - add_dependencies(toolchain bzip2_ep) add_dependencies(BZip2::BZip2 bzip2_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS BZip2::BZip2) @@ -2695,7 +2676,6 @@ macro(build_utf8proc) target_include_directories(utf8proc::utf8proc BEFORE INTERFACE "${UTF8PROC_PREFIX}/include") - add_dependencies(toolchain utf8proc_ep) add_dependencies(utf8proc::utf8proc utf8proc_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS utf8proc::utf8proc) @@ -2707,7 +2687,6 @@ if(ARROW_WITH_UTF8PROC) libutf8proc REQUIRED_VERSION "2.2.0") - add_definitions(-DARROW_WITH_UTF8PROC) endif() macro(build_cares) @@ -2733,7 +2712,6 @@ macro(build_cares) file(MAKE_DIRECTORY ${CARES_INCLUDE_DIR}) - add_dependencies(toolchain cares_ep) add_library(c-ares::cares STATIC IMPORTED) set_target_properties(c-ares::cares PROPERTIES IMPORTED_LOCATION "${CARES_STATIC_LIB}") target_include_directories(c-ares::cares BEFORE INTERFACE "${CARES_INCLUDE_DIR}") @@ -3978,7 +3956,6 @@ macro(build_grpc) ${GRPC_CPP_PLUGIN}) add_dependencies(grpc_ep grpc_dependencies) - add_dependencies(toolchain grpc_ep) add_dependencies(gRPC::grpc++ grpc_ep) add_dependencies(gRPC::grpc_cpp_plugin grpc_ep) set(GRPC_VENDORED TRUE) @@ -4228,8 +4205,6 @@ macro(build_google_cloud_cpp_storage) # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052 file(MAKE_DIRECTORY ${GOOGLE_CLOUD_CPP_INCLUDE_DIR}) - add_dependencies(toolchain google_cloud_cpp_ep) - add_library(google-cloud-cpp::common STATIC IMPORTED) set_target_properties(google-cloud-cpp::common PROPERTIES IMPORTED_LOCATION @@ -4450,7 +4425,6 @@ macro(build_orc) target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) endif() - add_dependencies(toolchain orc_ep) add_dependencies(orc::orc orc_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) @@ -4458,6 +4432,7 @@ endmacro() if(ARROW_ORC) resolve_dependency(orc HAVE_ALT TRUE) + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() @@ -4617,9 +4592,6 @@ macro(build_opentelemetry) DEPENDEES download DEPENDERS configure) - add_dependencies(toolchain opentelemetry_ep) - add_dependencies(toolchain-tests opentelemetry_ep) - set(OPENTELEMETRY_VENDORED 1) target_link_libraries(opentelemetry-cpp::common @@ -4661,6 +4633,9 @@ if(ARROW_WITH_OPENTELEMETRY) find_curl() set(opentelemetry-cpp_SOURCE "AUTO") resolve_dependency(opentelemetry-cpp) + set(ARROW_OPENTELEMETRY_LIBS + opentelemetry-cpp::trace opentelemetry-cpp::ostream_span_exporter + opentelemetry-cpp::otlp_http_exporter) get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api INTERFACE_INCLUDE_DIRECTORIES) message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}") @@ -4996,7 +4971,6 @@ macro(build_awssdk) ${AWS_CPP_SDK_S3_STATIC_LIBRARY} ${AWS_CPP_SDK_STS_STATIC_LIBRARY} DEPENDS aws_crt_cpp_ep) - add_dependencies(toolchain awssdk_ep) foreach(_AWSSDK_LIB ${_AWSSDK_LIBS}) if(${_AWSSDK_LIB} MATCHES "^aws-cpp-sdk-") add_dependencies(${_AWSSDK_LIB} awssdk_ep) @@ -5025,6 +4999,9 @@ macro(build_awssdk) PROPERTY INTERFACE_LINK_LIBRARIES ZLIB::ZLIB) add_dependencies(awssdk_ep zlib_ep) endif() + set_property(TARGET AWS::aws-c-io + APPEND + PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS}) elseif(WIN32) set_property(TARGET aws-cpp-sdk-core APPEND @@ -5128,12 +5105,8 @@ endfunction() if(ARROW_WITH_AZURE_SDK) resolve_dependency(Azure REQUIRED_VERSION 1.10.2) - set(AZURE_SDK_LINK_LIBRARIES - Azure::azure-storage-files-datalake - Azure::azure-storage-common - Azure::azure-storage-blobs - Azure::azure-identity - Azure::azure-core) + set(AZURE_SDK_LINK_LIBRARIES Azure::azure-storage-files-datalake + Azure::azure-storage-blobs Azure::azure-identity) endif() # ---------------------------------------------------------------------- # ucx - communication framework for modern, high-bandwidth and low-latency networks @@ -5198,7 +5171,6 @@ macro(build_ucx) add_library(ucx::ucs SHARED IMPORTED) set_target_properties(ucx::ucs PROPERTIES IMPORTED_LOCATION "${UCX_SHARED_LIB_UCS}") - add_dependencies(toolchain ucx_ep) add_dependencies(ucx::ucp ucx_ep) add_dependencies(ucx::uct ucx_ep) add_dependencies(ucx::ucs ucx_ep) diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt index 4625f130565e7..a092a31733f72 100644 --- a/cpp/examples/arrow/CMakeLists.txt +++ b/cpp/examples/arrow/CMakeLists.txt @@ -17,7 +17,9 @@ add_arrow_example(row_wise_conversion_example) -add_arrow_example(rapidjson_row_converter) +if(ARROW_WITH_RAPIDJSON) + add_arrow_example(rapidjson_row_converter EXTRA_LINK_LIBS RapidJSON) +endif() if(ARROW_ACERO) if(ARROW_BUILD_SHARED) @@ -93,6 +95,17 @@ if(ARROW_FLIGHT) add_custom_target(flight_grpc_example_gen ALL DEPENDS ${FLIGHT_EXAMPLE_GENERATED_PROTO_FILES}) + set(FLIGHT_GRPC_EXAMPLE_LINK_LIBS + ${FLIGHT_EXAMPLES_LINK_LIBS} + gRPC::grpc++ + ${GRPC_REFLECTION_LINK_LIBS} + ${ARROW_PROTOBUF_LIBPROTOBUF} + ${GFLAGS_LIBRARIES}) + if(TARGET absl::log_internal_check_op) + # Protobuf generated files may use ABSL_DCHECK*() and + # absl::log_internal_check_op is needed for them. + list(APPEND FLIGHT_GRPC_EXAMPLE_LINK_LIBS absl::log_internal_check_op) + endif() add_arrow_example(flight_grpc_example DEPENDENCIES flight_grpc_example_gen @@ -102,11 +115,7 @@ if(ARROW_FLIGHT) EXTRA_INCLUDES ${CMAKE_BINARY_DIR} EXTRA_LINK_LIBS - ${FLIGHT_EXAMPLES_LINK_LIBS} - gRPC::grpc++ - ${GRPC_REFLECTION_LINK_LIBS} - ${ARROW_PROTOBUF_LIBPROTOBUF} - ${GFLAGS_LIBRARIES} + ${FLIGHT_GRPC_EXAMPLE_LINK_LIBS} EXTRA_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/helloworld.pb.cc" "${CMAKE_CURRENT_BINARY_DIR}/helloworld.grpc.pb.cc") diff --git a/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc b/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc index 1f7246b78160c..7189aa853917c 100644 --- a/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc +++ b/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc @@ -25,6 +25,7 @@ #include #include "arrow/io/file.h" +#include "arrow/util/config.h" #include "parquet/exception.h" #include "parquet/stream_reader.h" #include "parquet/stream_writer.h" diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py index 6c3af1680bdae..e6180f2ff0eeb 100644 --- a/cpp/gdb_arrow.py +++ b/cpp/gdb_arrow.py @@ -956,10 +956,12 @@ def storage_type(self): def to_string(self): """ - The result of calling ToString(). + The result of calling ToString(show_metadata=True). """ + # XXX `show_metadata` is an optional argument, but gdb doesn't allow + # omitting it. return StdString(gdb.parse_and_eval( - f"{for_evaluation(self.val)}.ToString()")) + f"{for_evaluation(self.val)}.ToString(true)")) class Schema: diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index c1fafeebc035d..0bf55e38f92b5 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -26,6 +26,214 @@ add_dependencies(arrow-all arrow-benchmarks arrow-integration) +# Libraries to link with libarrow.so. They aren't exported. +set(ARROW_SHARED_PRIVATE_LINK_LIBS) + +# Libraries to link with exported libarrow.{so,a}. +set(ARROW_SHARED_INSTALL_INTERFACE_LIBS) +set(ARROW_STATIC_INSTALL_INTERFACE_LIBS) + +if(ARROW_GCS) + if(google_cloud_cpp_storage_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS google-cloud-cpp::storage) + endif() +endif() + +if(ARROW_USE_OPENSSL) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_OPENSSL_LIBS}) +endif() + +if(ARROW_WITH_BROTLI) + if(Brotli_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_BROTLI_LIBS}) + endif() +endif() + +if(ARROW_WITH_BZ2) + if(BZip2_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS BZip2::BZip2) + endif() +endif() + +if(ARROW_WITH_LZ4) + if(lz4_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4) + endif() +endif() + +if(ARROW_WITH_SNAPPY) + if(Snappy_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${Snappy_TARGET}) + endif() +endif() + +if(ARROW_WITH_ZLIB) + if(ZLIB_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ZLIB::ZLIB) + endif() +endif() + +if(ARROW_WITH_ZSTD) + if(zstd_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_ZSTD_LIBZSTD}) + endif() +endif() + +if(ARROW_ORC) + if(ORC_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc) + endif() +endif() + +if(ARROW_USE_GLOG) + if(GLOG_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS glog::glog) + endif() +endif() + +if(ARROW_S3) + if(AWSSDK_SOURCE STREQUAL "SYSTEM") + list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + aws-cpp-sdk-identity-management + aws-cpp-sdk-sts + aws-cpp-sdk-cognito-identity + aws-cpp-sdk-s3 + aws-cpp-sdk-core) + elseif(AWSSDK_SOURCE STREQUAL "BUNDLED") + if(UNIX) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) + endif() + endif() +endif() + +if(ARROW_WITH_OPENTELEMETRY) + if(opentelemetry_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_OPENTELEMETRY_LIBS}) + endif() + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) +endif() + +if(ARROW_WITH_UTF8PROC) + if(utf8proc_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS utf8proc::utf8proc) + endif() +endif() + +if(ARROW_WITH_RE2) + if(re2_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS re2::re2) + endif() +endif() + +# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY) +# because they depend on Protobuf. +if(ARROW_WITH_PROTOBUF) + if(Protobuf_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF}) + endif() +endif() + +if(ARROW_ENABLE_THREADING) + list(APPEND ARROW_SHARED_PRIVATE_LINK_LIBS Threads::Threads) + list(APPEND ARROW_STATIC_LINK_LIBS Threads::Threads) +endif() + +if(NOT MSVC_TOOLCHAIN) + list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS}) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS}) +endif() + +set(ARROW_TEST_LINK_TOOLCHAIN arrow::flatbuffers ${ARROW_GTEST_GMOCK} + ${ARROW_GTEST_GTEST_MAIN}) +set(ARROW_TEST_STATIC_LINK_LIBS arrow_testing_static arrow_static + ${ARROW_TEST_LINK_TOOLCHAIN}) +set(ARROW_TEST_SHARED_LINK_LIBS arrow_testing_shared arrow_shared + ${ARROW_TEST_LINK_TOOLCHAIN}) +if(NOT MSVC) + list(APPEND ARROW_TEST_SHARED_LINK_LIBS ${CMAKE_DL_LIBS}) +endif() + +# ---------------------------------------------------------------------- +# Handle platform-related libraries like -pthread + +set(ARROW_SYSTEM_LINK_LIBS) + +if(CMAKE_THREAD_LIBS_INIT) + string(APPEND ARROW_PC_LIBS_PRIVATE " ${CMAKE_THREAD_LIBS_INIT}") +endif() + +if(WIN32) + list(APPEND ARROW_SYSTEM_LINK_LIBS "ws2_32.dll") +endif() + +if(NOT WIN32 AND NOT APPLE) + # Pass -lrt on Linux only + list(APPEND ARROW_SYSTEM_LINK_LIBS rt) +endif() + +list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS}) +list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS}) + +# Need -latomic on Raspbian. +# See also: https://issues.apache.org/jira/browse/ARROW-12860 +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") + string(APPEND ARROW_PC_LIBS_PRIVATE " -latomic") + list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS "atomic") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS "atomic") +endif() + +# This creates OBJECT libraries for arrow_shared/arrow_static. This is +# not intended to use for other libraries such as +# arrow_acero_shared/arrow_acero_static for now. +# +# arrow_shared/arrow_static depends on many external libraries such as +# Zstandard and jemalloc. If we use bundled libraries, we can't start +# building arrow_shared/arrow_static until all bundled libraries are +# built. It prevent parallel build speedup. +# +# We can avoid the situation by creating small OBJECT libraries that +# depend only needed external libraries. If an OBJECT library doesn't +# depend on any bundled libraries, it can be built before bundled +# libraries are built. If an OBJECT library depend on only a few +# bundled libraries, it can be built after only they are built. +function(arrow_add_object_library PREFIX) + set(SOURCES ${ARGN}) + string(TOLOWER "${PREFIX}" prefix) + if(WIN32) + add_library(${prefix}_shared OBJECT ${SOURCES}) + add_library(${prefix}_static OBJECT ${SOURCES}) + set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) + target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) + target_compile_features(${prefix}_shared PRIVATE cxx_std_17) + target_compile_features(${prefix}_static PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_SHARED + ${prefix}_shared + PARENT_SCOPE) + set(${PREFIX}_TARGET_STATIC + ${prefix}_static + PARENT_SCOPE) + set(${PREFIX}_TARGETS + ${prefix}_shared ${prefix}_static + PARENT_SCOPE) + else() + add_library(${prefix} OBJECT ${SOURCES}) + set_target_properties(${prefix} PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_features(${prefix} PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_SHARED + ${prefix} + PARENT_SCOPE) + set(${PREFIX}_TARGET_STATIC + ${prefix} + PARENT_SCOPE) + set(${PREFIX}_TARGETS + ${prefix} + PARENT_SCOPE) + endif() +endfunction() + # Adding unit tests part of the "arrow" portion of the test suite function(ADD_ARROW_TEST REL_TEST_NAME) set(options) @@ -119,53 +327,32 @@ function(ADD_ARROW_BENCHMARK REL_TEST_NAME) ${ARG_UNPARSED_ARGUMENTS}) endfunction() -macro(append_runtime_avx2_src SRC) +macro(append_runtime_avx2_src SRCS SRC) if(ARROW_HAVE_RUNTIME_AVX2) - list(APPEND ARROW_SRCS ${SRC}) + list(APPEND ${SRCS} ${SRC}) set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON) set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX2_FLAG}) endif() endmacro() -macro(append_runtime_avx2_bmi2_src SRC) +macro(append_runtime_avx2_bmi2_src SRCS SRC) if(ARROW_HAVE_RUNTIME_AVX2 AND ARROW_HAVE_RUNTIME_BMI2) - list(APPEND ARROW_SRCS ${SRC}) + list(APPEND ${SRCS} ${SRC}) set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON) set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS "${ARROW_AVX2_FLAG} ${ARROW_BMI2_FLAG}") endif() endmacro() -macro(append_runtime_avx512_src SRC) +macro(append_runtime_avx512_src SRCS SRC) if(ARROW_HAVE_RUNTIME_AVX512) - list(APPEND ARROW_SRCS ${SRC}) + list(APPEND ${SRCS} ${SRC}) set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON) set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX512_FLAG}) endif() endmacro() set(ARROW_SRCS - array/array_base.cc - array/array_binary.cc - array/array_decimal.cc - array/array_dict.cc - array/array_nested.cc - array/array_primitive.cc - array/array_run_end.cc - array/builder_adaptive.cc - array/builder_base.cc - array/builder_binary.cc - array/builder_decimal.cc - array/builder_dict.cc - array/builder_run_end.cc - array/builder_nested.cc - array/builder_primitive.cc - array/builder_union.cc - array/concatenate.cc - array/data.cc - array/diff.cc - array/util.cc - array/validate.cc builder.cc buffer.cc chunked_array.cc @@ -175,7 +362,6 @@ set(ARROW_SRCS datum.cc device.cc extension_type.cc - memory_pool.cc pretty_print.cc record_batch.cc result.cc @@ -192,18 +378,109 @@ set(ARROW_SRCS type_traits.cc visitor.cc c/bridge.cc - c/dlpack.cc - io/buffered.cc - io/caching.cc - io/compressed.cc - io/file.cc - io/hdfs.cc - io/hdfs_internal.cc - io/interfaces.cc - io/memory.cc - io/slow.cc - io/stdio.cc - io/transform.cc + c/dlpack.cc) + +arrow_add_object_library(ARROW_ARRAY + array/array_base.cc + array/array_binary.cc + array/array_decimal.cc + array/array_dict.cc + array/array_nested.cc + array/array_primitive.cc + array/array_run_end.cc + array/builder_adaptive.cc + array/builder_base.cc + array/builder_binary.cc + array/builder_decimal.cc + array/builder_dict.cc + array/builder_run_end.cc + array/builder_nested.cc + array/builder_primitive.cc + array/builder_union.cc + array/concatenate.cc + array/data.cc + array/diff.cc + array/util.cc + array/validate.cc) + +arrow_add_object_library(ARROW_IO + io/buffered.cc + io/caching.cc + io/compressed.cc + io/file.cc + io/hdfs.cc + io/hdfs_internal.cc + io/interfaces.cc + io/memory.cc + io/slow.cc + io/stdio.cc + io/transform.cc) +foreach(ARROW_IO_TARGET ${ARROW_IO_TARGETS}) + target_link_libraries(${ARROW_IO_TARGET} PRIVATE arrow::hadoop) + if(NOT MSVC) + target_link_libraries(${ARROW_IO_TARGET} PRIVATE ${CMAKE_DL_LIBS}) + endif() +endforeach() + +set(ARROW_MEMORY_POOL_SRCS memory_pool.cc) +if(ARROW_JEMALLOC) + list(APPEND ARROW_MEMORY_POOL_SRCS memory_pool_jemalloc.cc) + set_source_files_properties(memory_pool_jemalloc.cc + PROPERTIES SKIP_PRECOMPILE_HEADERS ON + SKIP_UNITY_BUILD_INCLUSION ON) +endif() +arrow_add_object_library(ARROW_MEMORY_POOL ${ARROW_MEMORY_POOL_SRCS}) +if(ARROW_JEMALLOC) + foreach(ARROW_MEMORY_POOL_TARGET ${ARROW_MEMORY_POOL_TARGETS}) + target_link_libraries(${ARROW_MEMORY_POOL_TARGET} PRIVATE jemalloc::jemalloc) + endforeach() +endif() +if(ARROW_MIMALLOC) + foreach(ARROW_MEMORY_POOL_TARGET ${ARROW_MEMORY_POOL_TARGETS}) + target_link_libraries(${ARROW_MEMORY_POOL_TARGET} PRIVATE mimalloc::mimalloc) + endforeach() +endif() + +set(ARROW_VENDORED_SRCS + vendored/base64.cpp + vendored/datetime/tz.cpp + vendored/double-conversion/bignum-dtoa.cc + vendored/double-conversion/bignum.cc + vendored/double-conversion/cached-powers.cc + vendored/double-conversion/double-to-string.cc + vendored/double-conversion/fast-dtoa.cc + vendored/double-conversion/fixed-dtoa.cc + vendored/double-conversion/string-to-double.cc + vendored/double-conversion/strtod.cc + vendored/musl/strptime.c + vendored/uriparser/UriCommon.c + vendored/uriparser/UriCompare.c + vendored/uriparser/UriEscape.c + vendored/uriparser/UriFile.c + vendored/uriparser/UriIp4.c + vendored/uriparser/UriIp4Base.c + vendored/uriparser/UriMemory.c + vendored/uriparser/UriNormalize.c + vendored/uriparser/UriNormalizeBase.c + vendored/uriparser/UriParse.c + vendored/uriparser/UriParseBase.c + vendored/uriparser/UriQuery.c + vendored/uriparser/UriRecompose.c + vendored/uriparser/UriResolve.c + vendored/uriparser/UriShorten.c) +if(APPLE) + list(APPEND ARROW_VENDORED_SRCS vendored/datetime/ios.mm) +endif() +set_source_files_properties(vendored/datetime/tz.cpp + PROPERTIES SKIP_PRECOMPILE_HEADERS ON + SKIP_UNITY_BUILD_INCLUSION ON) +arrow_add_object_library(ARROW_VENDORED ${ARROW_VENDORED_SRCS}) +# Disable DLL exports in vendored uriparser library +foreach(ARROW_VENDORED_TARGET ${ARROW_VENDORED_TARGETS}) + target_compile_definitions(${ARROW_VENDORED_TARGET} PUBLIC URI_STATIC_BUILD) +endforeach() + +set(ARROW_UTIL_SRCS util/align_util.cc util/async_util.cc util/atfork_internal.cc @@ -249,98 +526,108 @@ set(ARROW_SRCS util/unreachable.cc util/uri.cc util/utf8.cc - util/value_parsing.cc - vendored/base64.cpp - vendored/datetime/tz.cpp - vendored/double-conversion/bignum.cc - vendored/double-conversion/bignum-dtoa.cc - vendored/double-conversion/cached-powers.cc - vendored/double-conversion/double-to-string.cc - vendored/double-conversion/fast-dtoa.cc - vendored/double-conversion/fixed-dtoa.cc - vendored/double-conversion/string-to-double.cc - vendored/double-conversion/strtod.cc) - -if(ARROW_JEMALLOC) - list(APPEND ARROW_SRCS memory_pool_jemalloc.cc) - set_source_files_properties(memory_pool_jemalloc.cc - PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) -endif() - -append_runtime_avx2_src(util/bpacking_avx2.cc) -append_runtime_avx512_src(util/bpacking_avx512.cc) + util/value_parsing.cc) +append_runtime_avx2_src(ARROW_UTIL_SRCS util/bpacking_avx2.cc) +append_runtime_avx512_src(ARROW_UTIL_SRCS util/bpacking_avx512.cc) if(ARROW_HAVE_NEON) - list(APPEND ARROW_SRCS util/bpacking_neon.cc) + list(APPEND ARROW_UTIL_SRCS util/bpacking_neon.cc) endif() -if(APPLE) - list(APPEND ARROW_SRCS vendored/datetime/ios.mm) -endif() - -set(ARROW_C_SRCS - vendored/musl/strptime.c - vendored/uriparser/UriCommon.c - vendored/uriparser/UriCompare.c - vendored/uriparser/UriEscape.c - vendored/uriparser/UriFile.c - vendored/uriparser/UriIp4Base.c - vendored/uriparser/UriIp4.c - vendored/uriparser/UriMemory.c - vendored/uriparser/UriNormalizeBase.c - vendored/uriparser/UriNormalize.c - vendored/uriparser/UriParseBase.c - vendored/uriparser/UriParse.c - vendored/uriparser/UriQuery.c - vendored/uriparser/UriRecompose.c - vendored/uriparser/UriResolve.c - vendored/uriparser/UriShorten.c) - -set_source_files_properties(vendored/datetime/tz.cpp - PROPERTIES SKIP_PRECOMPILE_HEADERS ON - SKIP_UNITY_BUILD_INCLUSION ON) - -# Disable DLL exports in vendored uriparser library -add_definitions(-DURI_STATIC_BUILD) - if(ARROW_WITH_BROTLI) - add_definitions(-DARROW_WITH_BROTLI) - list(APPEND ARROW_SRCS util/compression_brotli.cc) + list(APPEND ARROW_UTIL_SRCS util/compression_brotli.cc) endif() - if(ARROW_WITH_BZ2) - add_definitions(-DARROW_WITH_BZ2) - list(APPEND ARROW_SRCS util/compression_bz2.cc) + list(APPEND ARROW_UTIL_SRCS util/compression_bz2.cc) endif() - if(ARROW_WITH_LZ4) - add_definitions(-DARROW_WITH_LZ4) - list(APPEND ARROW_SRCS util/compression_lz4.cc) + list(APPEND ARROW_UTIL_SRCS util/compression_lz4.cc) endif() - if(ARROW_WITH_OPENTELEMETRY) - list(APPEND ARROW_SRCS util/tracing_internal.cc) + list(APPEND ARROW_UTIL_SRCS util/tracing_internal.cc) endif() - if(ARROW_WITH_SNAPPY) - add_definitions(-DARROW_WITH_SNAPPY) - list(APPEND ARROW_SRCS util/compression_snappy.cc) + list(APPEND ARROW_UTIL_SRCS util/compression_snappy.cc) endif() - if(ARROW_WITH_ZLIB) - add_definitions(-DARROW_WITH_ZLIB) - list(APPEND ARROW_SRCS util/compression_zlib.cc) + list(APPEND ARROW_UTIL_SRCS util/compression_zlib.cc) endif() +if(ARROW_WITH_ZSTD) + list(APPEND ARROW_UTIL_SRCS util/compression_zstd.cc) +endif() + +arrow_add_object_library(ARROW_UTIL ${ARROW_UTIL_SRCS}) +if(ARROW_USE_BOOST) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE Boost::headers) + endforeach() +endif() +if(ARROW_USE_GLOG) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE glog::glog) + endforeach() +endif() +if(ARROW_USE_XSIMD) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD}) + endforeach() +endif() +if(ARROW_WITH_BROTLI) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_BROTLI_LIBS}) + endforeach() +endif() +if(ARROW_WITH_BZ2) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE BZip2::BZip2) + endforeach() +endif() +if(ARROW_WITH_LZ4) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE LZ4::lz4) + endforeach() +endif() +if(ARROW_WITH_SNAPPY) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${Snappy_TARGET}) + endforeach() +endif() +if(ARROW_WITH_OPENTELEMETRY) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) + endforeach() +endif() +if(ARROW_WITH_ZLIB) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ZLIB::ZLIB) + endforeach() +endif() if(ARROW_WITH_ZSTD) - add_definitions(-DARROW_WITH_ZSTD) - list(APPEND ARROW_SRCS util/compression_zstd.cc) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_ZSTD_LIBZSTD}) + endforeach() endif() -set(ARROW_TESTING_SHARED_LINK_LIBS arrow::flatbuffers rapidjson::rapidjson arrow_shared - ${ARROW_GTEST_GTEST}) -set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers rapidjson::rapidjson arrow_static +set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST}) +set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON) +set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static ${ARROW_GTEST_GTEST}) +set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) +set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) +# that depend on gtest +if(GTest_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST}) + list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST}) +else() + list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ArrowTesting::gtest) + list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ArrowTesting::gtest) +endif() +if(WIN32) + list(APPEND ARROW_TESTING_SHARED_LINK_LIBS "ws2_32.dll") + list(APPEND ARROW_TESTING_STATIC_LINK_LIBS "ws2_32.dll") + list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS "ws2_32.dll") +endif() set(ARROW_TESTING_SRCS io/test_common.cc @@ -350,115 +637,92 @@ set(ARROW_TESTING_SRCS testing/generator.cc testing/util.cc) -# Add dependencies for third-party allocators. -# If possible we only want memory_pool.cc to wait for allocators to finish building, -# but that only works with Ninja -# (see https://gitlab.kitware.com/cmake/cmake/issues/19677) - -set(_allocator_dependencies "") # Empty list -if(jemalloc_VENDORED) - list(APPEND _allocator_dependencies jemalloc_ep) -endif() -if(mimalloc_VENDORED) - list(APPEND _allocator_dependencies mimalloc_ep) -endif() - -if(_allocator_dependencies) - if("${CMAKE_GENERATOR}" STREQUAL "Ninja") - set_source_files_properties(memory_pool.cc PROPERTIES OBJECT_DEPENDS - "${_allocator_dependencies}") - else() - add_dependencies(arrow_dependencies ${_allocator_dependencies}) - endif() - set_source_files_properties(memory_pool.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON - SKIP_UNITY_BUILD_INCLUSION ON) -endif() - -unset(_allocator_dependencies) - -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set_property(SOURCE util/io_util.cc - APPEND_STRING - PROPERTY COMPILE_FLAGS " -Wno-unused-macros ") -endif() - # # Configure the base Arrow libraries # if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS) - list(APPEND - ARROW_SRCS - integration/c_data_integration_internal.cc - integration/json_integration.cc - integration/json_internal.cc) + arrow_add_object_library(ARROW_INTEGRATION integration/c_data_integration_internal.cc + integration/json_integration.cc integration/json_internal.cc) + foreach(ARROW_INTEGRATION_TARGET ${ARROW_INTEGRATION_TARGETS}) + target_link_libraries(${ARROW_INTEGRATION_TARGET} PRIVATE RapidJSON) + endforeach() +else() + set(ARROW_INTEGRATION_TARGET_SHARED) + set(ARROW_INTEGRATION_TARGET_STATIC) endif() if(ARROW_CSV) - list(APPEND - ARROW_SRCS - csv/converter.cc - csv/chunker.cc - csv/column_builder.cc - csv/column_decoder.cc - csv/options.cc - csv/parser.cc - csv/reader.cc - csv/writer.cc) + arrow_add_object_library(ARROW_CSV + csv/converter.cc + csv/chunker.cc + csv/column_builder.cc + csv/column_decoder.cc + csv/options.cc + csv/parser.cc + csv/reader.cc + csv/writer.cc) + if(ARROW_USE_XSIMD) + foreach(ARROW_CSV_TARGET ${ARROW_CSV_TARGETS}) + target_link_libraries(${ARROW_CSV_TARGET} PRIVATE ${ARROW_XSIMD}) + endforeach() + endif() list(APPEND ARROW_TESTING_SRCS csv/test_common.cc) +else() + set(ARROW_CSV_TARGET_SHARED) + set(ARROW_CSV_TARGET_STATIC) endif() # Baseline Compute functionality + scalar casts and a few select kernels -list(APPEND - ARROW_SRCS - compute/api_aggregate.cc - compute/api_scalar.cc - compute/api_vector.cc - compute/cast.cc - compute/exec.cc - compute/expression.cc - compute/function.cc - compute/function_internal.cc - compute/kernel.cc - compute/key_hash.cc - compute/key_map.cc - compute/light_array.cc - compute/ordering.cc - compute/registry.cc - compute/kernels/codegen_internal.cc - compute/kernels/row_encoder.cc - compute/kernels/ree_util_internal.cc - compute/kernels/scalar_cast_boolean.cc - compute/kernels/scalar_cast_dictionary.cc - compute/kernels/scalar_cast_extension.cc - compute/kernels/scalar_cast_internal.cc - compute/kernels/scalar_cast_nested.cc - compute/kernels/scalar_cast_numeric.cc - compute/kernels/scalar_cast_string.cc - compute/kernels/scalar_cast_temporal.cc - compute/kernels/util_internal.cc - compute/kernels/vector_hash.cc - compute/kernels/vector_selection.cc - compute/kernels/vector_selection_filter_internal.cc - compute/kernels/vector_selection_internal.cc - compute/kernels/vector_selection_take_internal.cc - compute/row/encode_internal.cc - compute/row/compare_internal.cc - compute/row/grouper.cc - compute/row/row_internal.cc - compute/util.cc) - -append_runtime_avx2_src(compute/key_hash_avx2.cc) -append_runtime_avx2_bmi2_src(compute/key_map_avx2.cc) -append_runtime_avx2_src(compute/row/compare_internal_avx2.cc) -append_runtime_avx2_src(compute/row/encode_internal_avx2.cc) -append_runtime_avx2_bmi2_src(compute/util_avx2.cc) +set(ARROW_COMPUTE_SRCS + compute/api_aggregate.cc + compute/api_scalar.cc + compute/api_vector.cc + compute/cast.cc + compute/exec.cc + compute/expression.cc + compute/function.cc + compute/function_internal.cc + compute/kernel.cc + compute/key_hash.cc + compute/key_map.cc + compute/light_array.cc + compute/ordering.cc + compute/registry.cc + compute/kernels/codegen_internal.cc + compute/kernels/row_encoder.cc + compute/kernels/ree_util_internal.cc + compute/kernels/scalar_cast_boolean.cc + compute/kernels/scalar_cast_dictionary.cc + compute/kernels/scalar_cast_extension.cc + compute/kernels/scalar_cast_internal.cc + compute/kernels/scalar_cast_nested.cc + compute/kernels/scalar_cast_numeric.cc + compute/kernels/scalar_cast_string.cc + compute/kernels/scalar_cast_temporal.cc + compute/kernels/util_internal.cc + compute/kernels/vector_hash.cc + compute/kernels/vector_selection.cc + compute/kernels/vector_selection_filter_internal.cc + compute/kernels/vector_selection_internal.cc + compute/kernels/vector_selection_take_internal.cc + compute/row/encode_internal.cc + compute/row/compare_internal.cc + compute/row/grouper.cc + compute/row/row_internal.cc + compute/util.cc) + +append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc) +append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc) +append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc) +append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc) +append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc) if(ARROW_COMPUTE) # Include the remaining kernels list(APPEND - ARROW_SRCS + ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic.cc compute/kernels/aggregate_mode.cc compute/kernels/aggregate_quantile.cc @@ -488,53 +752,101 @@ if(ARROW_COMPUTE) compute/kernels/vector_select_k.cc compute/kernels/vector_sort.cc) - append_runtime_avx2_src(compute/kernels/aggregate_basic_avx2.cc) - append_runtime_avx512_src(compute/kernels/aggregate_basic_avx512.cc) + append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx2.cc) + append_runtime_avx512_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx512.cc) endif() -if(ARROW_FILESYSTEM) - if(ARROW_HDFS) - add_definitions(-DARROW_HDFS) - endif() +arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS}) +if(ARROW_USE_BOOST) + foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE Boost::headers) + endforeach() +endif() +if(ARROW_USE_XSIMD) + foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_XSIMD}) + endforeach() +endif() +if(ARROW_WITH_OPENTELEMETRY) + foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) + endforeach() +endif() +if(ARROW_WITH_RE2) + foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE re2::re2) + endforeach() +endif() +if(ARROW_WITH_UTF8PROC) + foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE utf8proc::utf8proc) + endforeach() +endif() - list(APPEND - ARROW_SRCS - filesystem/filesystem.cc - filesystem/localfs.cc - filesystem/mockfs.cc - filesystem/path_util.cc - filesystem/util_internal.cc) +if(ARROW_FILESYSTEM) + set(ARROW_FILESYSTEM_SRCS + filesystem/filesystem.cc + filesystem/localfs.cc + filesystem/mockfs.cc + filesystem/path_util.cc + filesystem/util_internal.cc) if(ARROW_AZURE) - list(APPEND ARROW_SRCS filesystem/azurefs.cc) + list(APPEND ARROW_FILESYSTEM_SRCS filesystem/azurefs.cc) set_source_files_properties(filesystem/azurefs.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) endif() if(ARROW_GCS) - list(APPEND ARROW_SRCS filesystem/gcsfs.cc filesystem/gcsfs_internal.cc) + list(APPEND ARROW_FILESYSTEM_SRCS filesystem/gcsfs.cc filesystem/gcsfs_internal.cc) set_source_files_properties(filesystem/gcsfs.cc filesystem/gcsfs_internal.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) endif() if(ARROW_HDFS) - list(APPEND ARROW_SRCS filesystem/hdfs.cc) + list(APPEND ARROW_FILESYSTEM_SRCS filesystem/hdfs.cc) endif() if(ARROW_S3) - list(APPEND ARROW_SRCS filesystem/s3fs.cc) + list(APPEND ARROW_FILESYSTEM_SRCS filesystem/s3fs.cc) set_source_files_properties(filesystem/s3fs.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) endif() + arrow_add_object_library(ARROW_FILESYSTEM ${ARROW_FILESYSTEM_SRCS}) + if(ARROW_AZURE) + foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS}) + target_link_libraries(${ARROW_FILESYSTEM_TARGET} + PRIVATE ${AZURE_SDK_LINK_LIBRARIES}) + endforeach() + endif() + if(ARROW_GCS) + foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS}) + target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE google-cloud-cpp::storage) + endforeach() + endif() + if(ARROW_HDFS) + foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS}) + target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE arrow::hadoop) + endforeach() + endif() + if(ARROW_S3) + foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS}) + target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE ${AWSSDK_LINK_LIBRARIES}) + endforeach() + endif() + list(APPEND ARROW_TESTING_SHARED_LINK_LIBS ${ARROW_GTEST_GMOCK}) list(APPEND ARROW_TESTING_STATIC_LINK_LIBS ${ARROW_GTEST_GMOCK}) list(APPEND ARROW_TESTING_SRCS filesystem/test_util.cc) +else() + set(ARROW_FILESYSTEM_TARGET_SHARED) + set(ARROW_FILESYSTEM_TARGET_STATIC) endif() if(ARROW_IPC) list(APPEND - ARROW_SRCS + ARROW_IPC_SRCS ipc/dictionary.cc ipc/feather.cc ipc/message.cc @@ -542,32 +854,51 @@ if(ARROW_IPC) ipc/options.cc ipc/reader.cc ipc/writer.cc) - if(ARROW_JSON) - list(APPEND ARROW_SRCS ipc/json_simple.cc) + list(APPEND ARROW_IPC_SRCS ipc/json_simple.cc) + endif() + arrow_add_object_library(ARROW_IPC ${ARROW_IPC_SRCS}) + foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS}) + target_link_libraries(${ARROW_IPC_TARGET} PRIVATE arrow::flatbuffers) + endforeach() + if(ARROW_JSON) + foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS}) + target_link_libraries(${ARROW_IPC_TARGET} PRIVATE RapidJSON) + endforeach() endif() +else() + set(ARROW_IPC_TARGET_SHARED) + set(ARROW_IPC_TARGET_STATIC) endif() if(ARROW_JSON) - list(APPEND - ARROW_SRCS - extension/fixed_shape_tensor.cc - json/options.cc - json/chunked_builder.cc - json/chunker.cc - json/converter.cc - json/object_parser.cc - json/object_writer.cc - json/parser.cc - json/reader.cc) + arrow_add_object_library(ARROW_JSON + extension/fixed_shape_tensor.cc + json/options.cc + json/chunked_builder.cc + json/chunker.cc + json/converter.cc + json/object_parser.cc + json/object_writer.cc + json/parser.cc + json/reader.cc) + foreach(ARROW_JSON_TARGET ${ARROW_JSON_TARGETS}) + target_link_libraries(${ARROW_JSON_TARGET} PRIVATE RapidJSON) + endforeach() +else() + set(ARROW_JSON_TARGET_SHARED) + set(ARROW_JSON_TARGET_STATIC) endif() if(ARROW_ORC) - list(APPEND - ARROW_SRCS - adapters/orc/adapter.cc - adapters/orc/options.cc - adapters/orc/util.cc) + arrow_add_object_library(ARROW_ORC adapters/orc/adapter.cc adapters/orc/options.cc + adapters/orc/util.cc) + foreach(ARROW_ORC_TARGET ${ARROW_ORC_TARGETS}) + target_link_libraries(${ARROW_ORC_TARGET} PRIVATE orc::orc) + endforeach() +else() + set(ARROW_ORC_TARGET_SHARED) + set(ARROW_ORC_TARGET_STATIC) endif() if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT) @@ -576,8 +907,6 @@ if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT) set(ARROW_SHARED_LINK_FLAGS ${ARROW_VERSION_SCRIPT_FLAGS}) endif() -set(ARROW_ALL_SRCS ${ARROW_SRCS} ${ARROW_C_SRCS}) - if(ARROW_BUILD_STATIC AND ARROW_BUNDLED_STATIC_LIBS) set(ARROW_BUILD_BUNDLED_DEPENDENCIES TRUE) else() @@ -620,14 +949,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") endif() endif() -# Need -latomic on Raspbian. -# See also: https://issues.apache.org/jira/browse/ARROW-12860 -if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") - string(APPEND ARROW_PC_LIBS_PRIVATE " -latomic") - list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS "atomic") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS "atomic") -endif() - # If libarrow.a is only built, "pkg-config --cflags --libs arrow" # outputs build flags for static linking not shared # linking. ARROW_PC_* except ARROW_PC_*_PRIVATE are for the static @@ -653,21 +974,42 @@ add_arrow_lib(arrow PKG_CONFIG_NAME arrow SOURCES - ${ARROW_ALL_SRCS} + ${ARROW_SRCS} OUTPUTS ARROW_LIBRARIES PRECOMPILED_HEADERS "$<$:arrow/pch.h>" - DEPENDENCIES - arrow_dependencies SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS} - SHARED_LINK_LIBS - ${ARROW_SHARED_LINK_LIBS} SHARED_PRIVATE_LINK_LIBS + ${ARROW_ARRAY_TARGET_SHARED} + ${ARROW_COMPUTE_TARGET_SHARED} + ${ARROW_CSV_TARGET_SHARED} + ${ARROW_FILESYSTEM_TARGET_SHARED} + ${ARROW_INTEGRATION_TARGET_SHARED} + ${ARROW_IO_TARGET_SHARED} + ${ARROW_IPC_TARGET_SHARED} + ${ARROW_JSON_TARGET_SHARED} + ${ARROW_MEMORY_POOL_TARGET_SHARED} + ${ARROW_ORC_TARGET_SHARED} + ${ARROW_UTIL_TARGET_SHARED} + ${ARROW_VENDORED_TARGET_SHARED} ${ARROW_SHARED_PRIVATE_LINK_LIBS} + ${ARROW_SYSTEM_LINK_LIBS} STATIC_LINK_LIBS - ${ARROW_STATIC_LINK_LIBS} + ${ARROW_ARRAY_TARGET_STATIC} + ${ARROW_COMPUTE_TARGET_STATIC} + ${ARROW_CSV_TARGET_STATIC} + ${ARROW_FILESYSTEM_TARGET_STATIC} + ${ARROW_INTEGRATION_TARGET_STATIC} + ${ARROW_IO_TARGET_STATIC} + ${ARROW_IPC_TARGET_STATIC} + ${ARROW_JSON_TARGET_STATIC} + ${ARROW_MEMORY_POOL_TARGET_STATIC} + ${ARROW_ORC_TARGET_STATIC} + ${ARROW_UTIL_TARGET_STATIC} + ${ARROW_VENDORED_TARGET_STATIC} + ${ARROW_SYSTEM_LINK_LIBS} STATIC_INSTALL_INTERFACE_LIBS ${ARROW_STATIC_INSTALL_INTERFACE_LIBS} SHARED_INSTALL_INTERFACE_LIBS @@ -730,16 +1072,6 @@ if(ARROW_WITH_BACKTRACE) endif() if(ARROW_TESTING) - # that depend on gtest - set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) - set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) - if(GTest_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST}) - list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST}) - else() - list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ArrowTesting::gtest) - list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ArrowTesting::gtest) - endif() add_arrow_lib(arrow_testing CMAKE_PACKAGE_NAME ArrowTesting @@ -751,10 +1083,10 @@ if(ARROW_TESTING) ARROW_TESTING_LIBRARIES PRECOMPILED_HEADERS "$<$:arrow/pch.h>" - DEPENDENCIES - arrow_test_dependencies SHARED_LINK_LIBS ${ARROW_TESTING_SHARED_LINK_LIBS} + SHARED_PRIVATE_LINK_LIBS + ${ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS} SHARED_INSTALL_INTERFACE_LIBS ${ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS} STATIC_LINK_LIBS @@ -817,7 +1149,6 @@ add_arrow_test(misc_test status_test.cc) add_arrow_test(public_api_test) - set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt index b77d52a23eedb..31ed4a6a69b6a 100644 --- a/cpp/src/arrow/acero/CMakeLists.txt +++ b/cpp/src/arrow/acero/CMakeLists.txt @@ -62,26 +62,19 @@ append_acero_runtime_avx2_src(bloom_filter_avx2.cc) append_acero_runtime_avx2_src(swiss_join_avx2.cc) set(ARROW_ACERO_SHARED_LINK_LIBS) +set(ARROW_ACERO_SHARED_PRIVATE_LINK_LIBS) set(ARROW_ACERO_STATIC_LINK_LIBS) set(ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS) set(ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS) if(ARROW_WITH_OPENTELEMETRY) - list(APPEND - ARROW_ACERO_SHARED_LINK_LIBS - opentelemetry-cpp::trace - opentelemetry-cpp::ostream_span_exporter - opentelemetry-cpp::otlp_http_exporter) - list(APPEND - ARROW_ACERO_STATIC_LINK_LIBS - opentelemetry-cpp::trace - opentelemetry-cpp::ostream_span_exporter - opentelemetry-cpp::otlp_http_exporter) + list(APPEND ARROW_ACERO_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) + list(APPEND ARROW_ACERO_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) endif() list(APPEND ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) list(APPEND ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) -list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static ${ARROW_STATIC_LINK_LIBS}) +list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static) list(APPEND ARROW_ACERO_SHARED_LINK_LIBS arrow_shared) add_arrow_lib(arrow_acero @@ -95,10 +88,10 @@ add_arrow_lib(arrow_acero ${ARROW_ACERO_SRCS} PRECOMPILED_HEADERS "$<$:arrow/acero/pch.h>" - DEPENDENCIES - toolchain SHARED_LINK_LIBS ${ARROW_ACERO_SHARED_LINK_LIBS} + SHARED_PRIVATE_LINK_LIBS + ${ARROW_ACERO_SHARED_PRIVATE_LINK_LIBS} SHARED_INSTALL_INTERFACE_LIBS ${ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS} STATIC_LINK_LIBS @@ -122,12 +115,22 @@ endforeach() # Define arrow_acero_testing object library for common test files if(ARROW_TESTING) - add_library(arrow_acero_testing OBJECT test_util_internal.cc) + # test_nodes.cc isn't used by all tests but link to it for simple + # CMakeLists.txt. + add_library(arrow_acero_testing OBJECT test_nodes.cc test_util_internal.cc) # Even though this is still just an object library we still need to "link" our # dependencies so that include paths are configured correctly target_link_libraries(arrow_acero_testing PRIVATE ${ARROW_ACERO_TEST_LINK_LIBS}) + # Only for test_nodes.cc. + if(ARROW_WITH_OPENTELEMETRY) + target_link_libraries(arrow_acero_testing PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) + endif() list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_testing) endif() +# Only for hash_aggregate_test.cc. +if(ARROW_USE_BOOST) + list(APPEND ARROW_ACERO_TEST_LINK_LIBS Boost::headers) +endif() # Adding unit tests part of the "dataset" portion of the test suite function(add_arrow_acero_test REL_TEST_NAME) @@ -162,26 +165,20 @@ function(add_arrow_acero_test REL_TEST_NAME) ${ARG_UNPARSED_ARGUMENTS}) endfunction() -add_arrow_acero_test(plan_test - SOURCES - plan_test.cc - test_nodes_test.cc - test_nodes.cc) -add_arrow_acero_test(source_node_test SOURCES source_node_test.cc test_nodes.cc) -add_arrow_acero_test(fetch_node_test SOURCES fetch_node_test.cc test_nodes.cc) -add_arrow_acero_test(order_by_node_test SOURCES order_by_node_test.cc test_nodes.cc) +add_arrow_acero_test(plan_test SOURCES plan_test.cc test_nodes_test.cc) +add_arrow_acero_test(source_node_test SOURCES source_node_test.cc) +add_arrow_acero_test(fetch_node_test SOURCES fetch_node_test.cc) +add_arrow_acero_test(order_by_node_test SOURCES order_by_node_test.cc) add_arrow_acero_test(hash_join_node_test SOURCES hash_join_node_test.cc bloom_filter_test.cc) -add_arrow_acero_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc - test_nodes.cc) +add_arrow_acero_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc) # asof_join_node and sorted_merge_node use std::thread internally # and doesn't use ThreadPool so it will # be broken if threading is turned off if(ARROW_ENABLE_THREADING) - add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc test_nodes.cc) - add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc - test_nodes.cc) + add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc) + add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc) endif() add_arrow_acero_test(tpch_node_test SOURCES tpch_node_test.cc) diff --git a/cpp/src/arrow/acero/asof_join_benchmark.cc b/cpp/src/arrow/acero/asof_join_benchmark.cc index 600c230a3fc3f..02116b09fc1fd 100644 --- a/cpp/src/arrow/acero/asof_join_benchmark.cc +++ b/cpp/src/arrow/acero/asof_join_benchmark.cc @@ -131,9 +131,8 @@ static void AsOfJoinOverhead(benchmark::State& state) { // this generates the set of right hand tables to test on. void SetArgs(benchmark::internal::Benchmark* bench) { bench - ->ArgNames({"left_freq", "left_cols", "left_ids", "left_batch_size", - "num_right_tables", "right_freq", "right_cols", "right_ids", - "right_batch_size"}) + ->ArgNames({"left_freq", "left_cols", "left_ids", "batch_size", "num_right_tables", + "right_freq", "right_cols", "right_ids"}) ->UseRealTime(); int default_freq = 400; diff --git a/cpp/src/arrow/acero/query_context.cc b/cpp/src/arrow/acero/query_context.cc index 9f838508fcd05..a27397d12079d 100644 --- a/cpp/src/arrow/acero/query_context.cc +++ b/cpp/src/arrow/acero/query_context.cc @@ -53,7 +53,7 @@ size_t QueryContext::max_concurrency() const { return thread_indexer_.Capacity() Result QueryContext::GetTempStack(size_t thread_index) { if (!tld_[thread_index].is_init) { RETURN_NOT_OK(tld_[thread_index].stack.Init( - memory_pool(), 8 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t))); + memory_pool(), 32 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t))); tld_[thread_index].is_init = true; } return &tld_[thread_index].stack; diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt index 4d66151cd38c9..14fb8e681d14b 100644 --- a/cpp/src/arrow/adapters/orc/CMakeLists.txt +++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt @@ -35,6 +35,7 @@ add_arrow_test(adapter_test PREFIX "arrow-orc" STATIC_LINK_LIBS + orc::orc ${ARROW_ORC_STATIC_LINK_LIBS}) set_source_files_properties(adapter_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 022fce72f59b8..4ec79a73029b4 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -565,6 +565,9 @@ void ReleaseExportedArray(struct ArrowArray* array) { } struct ArrayExporter { + explicit ArrayExporter(bool device_interface = false) + : device_interface_(device_interface) {} + Status Export(const std::shared_ptr& data) { // Force computing null count. // This is because ARROW-9037 is in version 0.17 and 0.17.1, and they are @@ -586,8 +589,12 @@ struct ArrayExporter { export_.buffers_.resize(n_buffers); std::transform(buffers_begin, data->buffers.end(), export_.buffers_.begin(), - [](const std::shared_ptr& buffer) -> const void* { - return buffer ? buffer->data() : nullptr; + [this](const std::shared_ptr& buffer) -> const void* { + return buffer + ? (device_interface_ + ? reinterpret_cast(buffer->address()) + : buffer->data()) + : nullptr; }); if (need_variadic_buffer_sizes) { @@ -602,15 +609,16 @@ struct ArrayExporter { // Export dictionary if (data->dictionary != nullptr) { - dict_exporter_ = std::make_unique(); + dict_exporter_ = std::make_unique(device_interface_); RETURN_NOT_OK(dict_exporter_->Export(data->dictionary)); } // Export children export_.children_.resize(data->child_data.size()); - child_exporters_.resize(data->child_data.size()); - for (size_t i = 0; i < data->child_data.size(); ++i) { - RETURN_NOT_OK(child_exporters_[i].Export(data->child_data[i])); + child_exporters_.reserve(data->child_data.size()); + for (const auto& child : data->child_data) { + child_exporters_.emplace_back(ArrayExporter{device_interface_}); + RETURN_NOT_OK(child_exporters_.back().Export(child)); } // Store owning pointer to ArrayData @@ -662,6 +670,7 @@ struct ArrayExporter { ExportedArrayPrivateData export_; std::unique_ptr dict_exporter_; std::vector child_exporters_; + bool device_interface_ = false; }; } // namespace @@ -756,7 +765,7 @@ Status ExportDeviceArray(const Array& array, std::shared_ptr } out->device_id = device_info.second; - ArrayExporter exporter; + ArrayExporter exporter(/*device_interface*/ true); RETURN_NOT_OK(exporter.Export(array.data())); exporter.Finish(&out->array); @@ -794,7 +803,7 @@ Status ExportDeviceRecordBatch(const RecordBatch& batch, } out->device_id = device_info.second; - ArrayExporter exporter; + ArrayExporter exporter(/*device_interface*/ true); RETURN_NOT_OK(exporter.Export(array->data())); exporter.Finish(&out->array); @@ -1958,6 +1967,14 @@ Result> ImportRecordBatch(struct ArrowArray* array, return ImportRecordBatch(array, *maybe_schema); } +Result> DefaultDeviceMapper(ArrowDeviceType device_type, + int64_t device_id) { + if (device_type != ARROW_DEVICE_CPU) { + return Status::NotImplemented("Only importing data on CPU is supported"); + } + return default_cpu_memory_manager(); +} + Result> ImportDeviceArray(struct ArrowDeviceArray* array, std::shared_ptr type, const DeviceMemoryMapper& mapper) { diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h index e98a42818f628..0ced3d38cd1e6 100644 --- a/cpp/src/arrow/c/bridge.h +++ b/cpp/src/arrow/c/bridge.h @@ -218,6 +218,10 @@ Status ExportDeviceRecordBatch(const RecordBatch& batch, using DeviceMemoryMapper = std::function>(ArrowDeviceType, int64_t)>; +ARROW_EXPORT +Result> DefaultDeviceMapper(ArrowDeviceType device_type, + int64_t device_id); + /// \brief EXPERIMENTAL: Import C++ device array from the C data interface. /// /// The ArrowArray struct has its contents moved (as per the C data interface @@ -226,12 +230,13 @@ using DeviceMemoryMapper = /// /// \param[in,out] array C data interface struct holding the array data /// \param[in] type type of the imported array -/// \param[in] mapper A function to map device + id to memory manager +/// \param[in] mapper A function to map device + id to memory manager. If not +/// specified, defaults to map "cpu" to the built-in default memory manager. /// \return Imported array object ARROW_EXPORT -Result> ImportDeviceArray(struct ArrowDeviceArray* array, - std::shared_ptr type, - const DeviceMemoryMapper& mapper); +Result> ImportDeviceArray( + struct ArrowDeviceArray* array, std::shared_ptr type, + const DeviceMemoryMapper& mapper = DefaultDeviceMapper); /// \brief EXPERIMENTAL: Import C++ device array and its type from the C data interface. /// @@ -242,12 +247,13 @@ Result> ImportDeviceArray(struct ArrowDeviceArray* array, /// /// \param[in,out] array C data interface struct holding the array data /// \param[in,out] type C data interface struct holding the array type -/// \param[in] mapper A function to map device + id to memory manager +/// \param[in] mapper A function to map device + id to memory manager. If not +/// specified, defaults to map "cpu" to the built-in default memory manager. /// \return Imported array object ARROW_EXPORT -Result> ImportDeviceArray(struct ArrowDeviceArray* array, - struct ArrowSchema* type, - const DeviceMemoryMapper& mapper); +Result> ImportDeviceArray( + struct ArrowDeviceArray* array, struct ArrowSchema* type, + const DeviceMemoryMapper& mapper = DefaultDeviceMapper); /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data /// interface. @@ -259,12 +265,13 @@ Result> ImportDeviceArray(struct ArrowDeviceArray* array, /// /// \param[in,out] array C data interface struct holding the record batch data /// \param[in] schema schema of the imported record batch -/// \param[in] mapper A function to map device + id to memory manager +/// \param[in] mapper A function to map device + id to memory manager. If not +/// specified, defaults to map "cpu" to the built-in default memory manager. /// \return Imported record batch object ARROW_EXPORT Result> ImportDeviceRecordBatch( struct ArrowDeviceArray* array, std::shared_ptr schema, - const DeviceMemoryMapper& mapper); + const DeviceMemoryMapper& mapper = DefaultDeviceMapper); /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device and its schema /// from the C data interface. @@ -278,12 +285,13 @@ Result> ImportDeviceRecordBatch( /// /// \param[in,out] array C data interface struct holding the record batch data /// \param[in,out] schema C data interface struct holding the record batch schema -/// \param[in] mapper A function to map device + id to memory manager +/// \param[in] mapper A function to map device + id to memory manager. If not +/// specified, defaults to map "cpu" to the built-in default memory manager. /// \return Imported record batch object ARROW_EXPORT Result> ImportDeviceRecordBatch( struct ArrowDeviceArray* array, struct ArrowSchema* schema, - const DeviceMemoryMapper& mapper); + const DeviceMemoryMapper& mapper = DefaultDeviceMapper); /// @} diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc index 4a1ba6d0a329c..29bccb52658f8 100644 --- a/cpp/src/arrow/chunk_resolver.cc +++ b/cpp/src/arrow/chunk_resolver.cc @@ -56,14 +56,33 @@ inline std::vector MakeChunksOffsets(const std::vector& chunks) { } } // namespace -ChunkResolver::ChunkResolver(const ArrayVector& chunks) +ChunkResolver::ChunkResolver(const ArrayVector& chunks) noexcept : offsets_(MakeChunksOffsets(chunks)), cached_chunk_(0) {} -ChunkResolver::ChunkResolver(const std::vector& chunks) +ChunkResolver::ChunkResolver(const std::vector& chunks) noexcept : offsets_(MakeChunksOffsets(chunks)), cached_chunk_(0) {} -ChunkResolver::ChunkResolver(const RecordBatchVector& batches) +ChunkResolver::ChunkResolver(const RecordBatchVector& batches) noexcept : offsets_(MakeChunksOffsets(batches)), cached_chunk_(0) {} +ChunkResolver::ChunkResolver(ChunkResolver&& other) noexcept + : offsets_(std::move(other.offsets_)), + cached_chunk_(other.cached_chunk_.load(std::memory_order_relaxed)) {} + +ChunkResolver& ChunkResolver::operator=(ChunkResolver&& other) noexcept { + offsets_ = std::move(other.offsets_); + cached_chunk_.store(other.cached_chunk_.load(std::memory_order_relaxed)); + return *this; +} + +ChunkResolver::ChunkResolver(const ChunkResolver& other) noexcept + : offsets_(other.offsets_), cached_chunk_(0) {} + +ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept { + offsets_ = other.offsets_; + cached_chunk_.store(0, std::memory_order_relaxed); + return *this; +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h index d3ae315568d08..c5dad1a17b18e 100644 --- a/cpp/src/arrow/chunk_resolver.h +++ b/cpp/src/arrow/chunk_resolver.h @@ -32,12 +32,12 @@ struct ChunkLocation { /// /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used /// to represent out-of-bounds locations. - int64_t chunk_index; + int64_t chunk_index = 0; /// \brief Index of the value in the chunk /// /// The value is undefined if chunk_index >= chunks.size() - int64_t index_in_chunk; + int64_t index_in_chunk = 0; }; /// \brief An utility that incrementally resolves logical indices into @@ -56,19 +56,15 @@ struct ARROW_EXPORT ChunkResolver { mutable std::atomic cached_chunk_; public: - explicit ChunkResolver(const ArrayVector& chunks); - explicit ChunkResolver(const std::vector& chunks); - explicit ChunkResolver(const RecordBatchVector& batches); - - ChunkResolver(ChunkResolver&& other) noexcept - : offsets_(std::move(other.offsets_)), - cached_chunk_(other.cached_chunk_.load(std::memory_order_relaxed)) {} - - ChunkResolver& operator=(ChunkResolver&& other) { - offsets_ = std::move(other.offsets_); - cached_chunk_.store(other.cached_chunk_.load(std::memory_order_relaxed)); - return *this; - } + explicit ChunkResolver(const ArrayVector& chunks) noexcept; + explicit ChunkResolver(const std::vector& chunks) noexcept; + explicit ChunkResolver(const RecordBatchVector& batches) noexcept; + + ChunkResolver(ChunkResolver&& other) noexcept; + ChunkResolver& operator=(ChunkResolver&& other) noexcept; + + ChunkResolver(const ChunkResolver& other) noexcept; + ChunkResolver& operator=(const ChunkResolver& other) noexcept; /// \brief Resolve a logical index to a ChunkLocation. /// @@ -96,16 +92,16 @@ struct ARROW_EXPORT ChunkResolver { /// \pre index >= 0 /// \post location.chunk_index in [0, chunks.size()] /// \param index The logical index to resolve - /// \param cached_chunk_index 0 or the chunk_index of the last ChunkLocation - /// returned by this ChunkResolver. + /// \param hint ChunkLocation{} or the last ChunkLocation returned by + /// this ChunkResolver. /// \return ChunkLocation with a valid chunk_index if index is within /// bounds, or with chunk_index == chunks.size() if logical index is /// `>= chunked_array.length()`. inline ChunkLocation ResolveWithChunkIndexHint(int64_t index, - int64_t cached_chunk_index) const { - assert(cached_chunk_index < static_cast(offsets_.size())); + ChunkLocation hint) const { + assert(hint.chunk_index < static_cast(offsets_.size())); const auto chunk_index = - ResolveChunkIndex(index, cached_chunk_index); + ResolveChunkIndex(index, hint.chunk_index); return {chunk_index, index - offsets_[chunk_index]}; } diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index e14d78ff6e5ca..badcf4f2f26ac 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -add_custom_target(arrow_compute) +add_custom_target(arrow-compute-tests) arrow_install_all_headers("arrow/compute") @@ -29,7 +29,7 @@ endif() # set(ARROW_COMPUTE_TEST_PREFIX "arrow-compute") -set(ARROW_COMPUTE_TEST_LABELS "arrow_compute") +set(ARROW_COMPUTE_TEST_LABELS "arrow-compute-tests") set(ARROW_COMPUTE_TEST_ARGS PREFIX ${ARROW_COMPUTE_TEST_PREFIX} LABELS ${ARROW_COMPUTE_TEST_LABELS}) diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc index 8c59ad1df86f2..38312225dd5c5 100644 --- a/cpp/src/arrow/compute/expression.cc +++ b/cpp/src/arrow/compute/expression.cc @@ -536,60 +536,67 @@ Result BindNonRecursive(Expression::Call call, bool insert_implicit_ std::vector types = GetTypes(call.arguments); ARROW_ASSIGN_OR_RAISE(call.function, GetFunction(call, exec_context)); + auto FinishBind = [&] { + compute::KernelContext kernel_context(exec_context, call.kernel); + if (call.kernel->init) { + const FunctionOptions* options = + call.options ? call.options.get() : call.function->default_options(); + ARROW_ASSIGN_OR_RAISE( + call.kernel_state, + call.kernel->init(&kernel_context, {call.kernel, types, options})); + + kernel_context.SetState(call.kernel_state.get()); + } + + ARROW_ASSIGN_OR_RAISE( + call.type, call.kernel->signature->out_type().Resolve(&kernel_context, types)); + return Status::OK(); + }; + // First try and bind exactly Result maybe_exact_match = call.function->DispatchExact(types); if (maybe_exact_match.ok()) { call.kernel = *maybe_exact_match; - } else { - if (!insert_implicit_casts) { - return maybe_exact_match.status(); + if (FinishBind().ok()) { + return Expression(std::move(call)); } - // If exact binding fails, and we are allowed to cast, then prefer casting literals - // first. Since DispatchBest generally prefers up-casting the best way to do this is - // first down-cast the literals as much as possible - types = GetTypesWithSmallestLiteralRepresentation(call.arguments); - ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&types)); - - for (size_t i = 0; i < types.size(); ++i) { - if (types[i] == call.arguments[i].type()) continue; - - if (const Datum* lit = call.arguments[i].literal()) { - ARROW_ASSIGN_OR_RAISE(Datum new_lit, - compute::Cast(*lit, types[i].GetSharedPtr())); - call.arguments[i] = literal(std::move(new_lit)); - continue; - } + } - // construct an implicit cast Expression with which to replace this argument - Expression::Call implicit_cast; - implicit_cast.function_name = "cast"; - implicit_cast.arguments = {std::move(call.arguments[i])}; + if (!insert_implicit_casts) { + return maybe_exact_match.status(); + } - // TODO(wesm): Use TypeHolder in options - implicit_cast.options = std::make_shared( - compute::CastOptions::Safe(types[i].GetSharedPtr())); + // If exact binding fails, and we are allowed to cast, then prefer casting literals + // first. Since DispatchBest generally prefers up-casting the best way to do this is + // first down-cast the literals as much as possible + types = GetTypesWithSmallestLiteralRepresentation(call.arguments); + ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&types)); - ARROW_ASSIGN_OR_RAISE( - call.arguments[i], - BindNonRecursive(std::move(implicit_cast), - /*insert_implicit_casts=*/false, exec_context)); + for (size_t i = 0; i < types.size(); ++i) { + if (types[i] == call.arguments[i].type()) continue; + + if (const Datum* lit = call.arguments[i].literal()) { + ARROW_ASSIGN_OR_RAISE(Datum new_lit, compute::Cast(*lit, types[i].GetSharedPtr())); + call.arguments[i] = literal(std::move(new_lit)); + continue; } - } - compute::KernelContext kernel_context(exec_context, call.kernel); - if (call.kernel->init) { - const FunctionOptions* options = - call.options ? call.options.get() : call.function->default_options(); - ARROW_ASSIGN_OR_RAISE( - call.kernel_state, - call.kernel->init(&kernel_context, {call.kernel, types, options})); + // construct an implicit cast Expression with which to replace this argument + Expression::Call implicit_cast; + implicit_cast.function_name = "cast"; + implicit_cast.arguments = {std::move(call.arguments[i])}; - kernel_context.SetState(call.kernel_state.get()); - } + // TODO(wesm): Use TypeHolder in options + implicit_cast.options = std::make_shared( + compute::CastOptions::Safe(types[i].GetSharedPtr())); - ARROW_ASSIGN_OR_RAISE( - call.type, call.kernel->signature->out_type().Resolve(&kernel_context, types)); + ARROW_ASSIGN_OR_RAISE( + call.arguments[i], + BindNonRecursive(std::move(implicit_cast), + /*insert_implicit_casts=*/false, exec_context)); + } + RETURN_NOT_OK(FinishBind()); return Expression(std::move(call)); } diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc index d33c348cd77da..38f8183dabcba 100644 --- a/cpp/src/arrow/compute/expression_test.cc +++ b/cpp/src/arrow/compute/expression_test.cc @@ -604,6 +604,20 @@ TEST(Expression, BindCall) { add(cast(field_ref("i32"), float32()), literal(3.5F))); } +TEST(Expression, BindWithDecimalArithmeticOps) { + for (std::string arith_op : {"add", "subtract", "multiply", "divide"}) { + auto expr = call(arith_op, {field_ref("d1"), field_ref("d2")}); + EXPECT_FALSE(expr.IsBound()); + + static const std::vector> scales = {{3, 9}, {6, 6}, {9, 3}}; + for (auto s : scales) { + auto schema = arrow::schema( + {field("d1", decimal256(30, s.first)), field("d2", decimal256(20, s.second))}); + ExpectBindsTo(expr, no_change, &expr, *schema); + } + } +} + TEST(Expression, BindWithImplicitCasts) { for (auto cmp : {equal, not_equal, less, less_equal, greater, greater_equal}) { // cast arguments to common numeric type diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 4350cd57ff026..afb30996eac15 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -23,7 +23,7 @@ if(ARROW_TESTING) add_library(arrow_compute_kernels_testing OBJECT test_util.cc) # Even though this is still just an object library we still need to "link" our # dependencies so that include paths are configured correctly - target_link_libraries(arrow_compute_kernels_testing PRIVATE ${ARROW_GTEST_GMOCK}) + target_link_libraries(arrow_compute_kernels_testing PUBLIC ${ARROW_GTEST_GMOCK}) endif() add_arrow_test(scalar_cast_test @@ -36,13 +36,17 @@ add_arrow_test(scalar_cast_test # ---------------------------------------------------------------------- # Scalar kernels +set(ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS arrow_compute_kernels_testing) +if(ARROW_WITH_UTF8PROC) + list(APPEND ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS utf8proc::utf8proc) +endif() add_arrow_compute_test(scalar_type_test SOURCES scalar_boolean_test.cc scalar_nested_test.cc scalar_string_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + ${ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS}) add_arrow_compute_test(scalar_if_else_test SOURCES @@ -126,7 +130,8 @@ add_arrow_compute_test(aggregate_test SOURCES aggregate_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + Boost::headers) # ---------------------------------------------------------------------- # Utilities diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h index 69f439fccf026..2b72e0ab3109e 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.h +++ b/cpp/src/arrow/compute/kernels/chunked_internal.h @@ -31,26 +31,7 @@ namespace compute { namespace internal { // The target chunk in a chunked array. -template struct ResolvedChunk { - using ViewType = GetViewType; - using LogicalValueType = typename ViewType::T; - - // The target array in chunked array. - const ArrayType* array; - // The index in the target array. - const int64_t index; - - ResolvedChunk(const ArrayType* array, int64_t index) : array(array), index(index) {} - - bool IsNull() const { return array->IsNull(index); } - - LogicalValueType Value() const { return ViewType::LogicalValue(array->GetView(index)); } -}; - -// ResolvedChunk specialization for untyped arrays when all is needed is null lookup -template <> -struct ResolvedChunk { // The target array in chunked array. const Array* array; // The index in the target array. @@ -58,24 +39,36 @@ struct ResolvedChunk { ResolvedChunk(const Array* array, int64_t index) : array(array), index(index) {} + public: bool IsNull() const { return array->IsNull(index); } + + template > + typename ViewType::T Value() const { + using LogicalArrayType = typename TypeTraits::ArrayType; + auto* typed_array = checked_cast(array); + return ViewType::LogicalValue(typed_array->GetView(index)); + } }; -struct ChunkedArrayResolver : protected ::arrow::internal::ChunkResolver { - ChunkedArrayResolver(const ChunkedArrayResolver& other) - : ::arrow::internal::ChunkResolver(other.chunks_), chunks_(other.chunks_) {} +class ChunkedArrayResolver { + private: + ::arrow::internal::ChunkResolver resolver_; + std::vector chunks_; + public: explicit ChunkedArrayResolver(const std::vector& chunks) - : ::arrow::internal::ChunkResolver(chunks), chunks_(chunks) {} + : resolver_(chunks), chunks_(chunks) {} - template - ResolvedChunk Resolve(int64_t index) const { - const auto loc = ::arrow::internal::ChunkResolver::Resolve(index); - return {checked_cast(chunks_[loc.chunk_index]), loc.index_in_chunk}; - } + ChunkedArrayResolver(ChunkedArrayResolver&& other) = default; + ChunkedArrayResolver& operator=(ChunkedArrayResolver&& other) = default; + + ChunkedArrayResolver(const ChunkedArrayResolver& other) = default; + ChunkedArrayResolver& operator=(const ChunkedArrayResolver& other) = default; - protected: - const std::vector chunks_; + ResolvedChunk Resolve(int64_t index) const { + const auto loc = resolver_.Resolve(index); + return {chunks_[loc.chunk_index], loc.index_in_chunk}; + } }; inline std::vector GetArrayPointers(const ArrayVector& arrays) { diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index 44f5fea79078a..efd25a8a20c80 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -499,8 +499,9 @@ Result ResolveDecimalBinaryOperationOutput( DCHECK_EQ(left_type.id(), right_type.id()); int32_t precision, scale; - std::tie(precision, scale) = getter(left_type.precision(), left_type.scale(), - right_type.precision(), right_type.scale()); + ARROW_ASSIGN_OR_RAISE(std::tie(precision, scale), + ToResult(getter(left_type.precision(), left_type.scale(), + right_type.precision(), right_type.scale()))); ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type.id(), precision, scale)); return std::move(type); } @@ -508,7 +509,13 @@ Result ResolveDecimalBinaryOperationOutput( Result ResolveDecimalAdditionOrSubtractionOutput( KernelContext*, const std::vector& types) { return ResolveDecimalBinaryOperationOutput( - types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) { + types, + [](int32_t p1, int32_t s1, int32_t p2, + int32_t s2) -> Result> { + if (s1 != s2) { + return Status::Invalid("Addition or subtraction of two decimal ", + "types scale1 != scale2. (", s1, s2, ")."); + } DCHECK_EQ(s1, s2); const int32_t scale = s1; const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1; @@ -519,7 +526,9 @@ Result ResolveDecimalAdditionOrSubtractionOutput( Result ResolveDecimalMultiplicationOutput( KernelContext*, const std::vector& types) { return ResolveDecimalBinaryOperationOutput( - types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) { + types, + [](int32_t p1, int32_t s1, int32_t p2, + int32_t s2) -> Result> { const int32_t scale = s1 + s2; const int32_t precision = p1 + p2 + 1; return std::make_pair(precision, scale); @@ -529,7 +538,13 @@ Result ResolveDecimalMultiplicationOutput( Result ResolveDecimalDivisionOutput(KernelContext*, const std::vector& types) { return ResolveDecimalBinaryOperationOutput( - types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) { + types, + [](int32_t p1, int32_t s1, int32_t p2, + int32_t s2) -> Result> { + if (s1 < s2) { + return Status::Invalid("Division of two decimal types scale1 < scale2. ", "(", + s1, s2, ")."); + } DCHECK_GE(s1, s2); const int32_t scale = s1 - s2; const int32_t precision = p1; diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc index 8fdc6172aa6d3..038e623b43c53 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc @@ -21,17 +21,18 @@ #include #include -#ifdef ARROW_WITH_RE2 -#include -#endif - #include "arrow/array/builder_nested.h" #include "arrow/compute/kernels/scalar_string_internal.h" #include "arrow/result.h" +#include "arrow/util/config.h" #include "arrow/util/macros.h" #include "arrow/util/string.h" #include "arrow/util/value_parsing.h" +#ifdef ARROW_WITH_RE2 +#include +#endif + namespace arrow { using internal::EndsWith; diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc index 0977ea7806cb4..909c89dbe4795 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc @@ -25,6 +25,7 @@ #include "arrow/testing/random.h" #include "arrow/util/benchmark_util.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/config.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index d7e35d07334ea..26289a7f787e1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -23,10 +23,6 @@ #include #include -#ifdef ARROW_WITH_UTF8PROC -#include -#endif - #include "arrow/compute/api_scalar.h" #include "arrow/compute/exec.h" #include "arrow/compute/kernels/codegen_internal.h" @@ -34,8 +30,13 @@ #include "arrow/testing/gtest_util.h" #include "arrow/type.h" #include "arrow/type_fwd.h" +#include "arrow/util/config.h" #include "arrow/util/value_parsing.h" +#ifdef ARROW_WITH_UTF8PROC +#include +#endif + namespace arrow::compute { // interesting utf8 characters for testing (lower case / upper case): diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc index cf8a697fea411..d720d4eee804f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc @@ -19,13 +19,14 @@ #include #include +#include "arrow/compute/kernels/scalar_string_internal.h" +#include "arrow/util/config.h" +#include "arrow/util/utf8_internal.h" + #ifdef ARROW_WITH_UTF8PROC #include #endif -#include "arrow/compute/kernels/scalar_string_internal.h" -#include "arrow/util/utf8_internal.h" - namespace arrow { namespace compute { namespace internal { diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc index 0cea7246e516c..c4e52701411fd 100644 --- a/cpp/src/arrow/compute/kernels/vector_rank.cc +++ b/cpp/src/arrow/compute/kernels/vector_rank.cc @@ -227,8 +227,6 @@ class Ranker : public RankerMixin Status RankInternal() { - using ArrayType = typename TypeTraits::ArrayType; - if (physical_chunks_.empty()) { return Status::OK(); } @@ -240,7 +238,7 @@ class Ranker : public RankerMixin(index).Value(); + return resolver.Resolve(index).Value(); }; ARROW_ASSIGN_OR_RAISE(*output_, CreateRankings(ctx_, sorted, null_placement_, tiebreaker_, value_selector)); diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc index 97996e6d52cc0..4ef7d80b994ff 100644 --- a/cpp/src/arrow/compute/kernels/vector_select_k.cc +++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc @@ -406,10 +406,7 @@ class TableSelector : public TypeVisitor { // Find the target chunk and index in the target chunk from an // index in chunked array. - template - ResolvedChunk GetChunk(int64_t index) const { - return resolver.Resolve(index); - } + ResolvedChunk GetChunk(int64_t index) const { return resolver.Resolve(index); } const SortOrder order; const std::shared_ptr type; @@ -495,7 +492,6 @@ class TableSelector : public TypeVisitor { template Status SelectKthInternal() { - using ArrayType = typename TypeTraits::ArrayType; auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; @@ -509,10 +505,10 @@ class TableSelector : public TypeVisitor { std::function cmp; SelectKComparator select_k_comparator; cmp = [&](const uint64_t& left, const uint64_t& right) -> bool { - auto chunk_left = first_sort_key.template GetChunk(left); - auto chunk_right = first_sort_key.template GetChunk(right); - auto value_left = chunk_left.Value(); - auto value_right = chunk_right.Value(); + auto chunk_left = first_sort_key.GetChunk(left); + auto chunk_right = first_sort_key.GetChunk(right); + auto value_left = chunk_left.Value(); + auto value_right = chunk_right.Value(); if (value_left == value_right) { return comparator.Compare(left, right, 1); } diff --git a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc index e65d5dbcab1c9..c2a27dfe43488 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc @@ -115,6 +115,24 @@ struct TakeBenchmark { indices_have_nulls(indices_have_nulls), monotonic_indices(monotonic_indices) {} + static constexpr int kStringMinLength = 0; + static constexpr int kStringMaxLength = 32; + static constexpr int kByteWidthRange = 2; + + template + std::shared_ptr GenChunkedArray(int64_t num_chunks, + GenChunk&& gen_chunk) { + const int64_t chunk_length = + std::llround(args.size / static_cast(num_chunks)); + ArrayVector chunks; + for (int64_t i = 0; i < num_chunks; ++i) { + const int64_t fitting_chunk_length = + std::min(chunk_length, args.size - i * chunk_length); + chunks.push_back(gen_chunk(fitting_chunk_length)); + } + return std::make_shared(std::move(chunks)); + } + void Int64() { auto values = rand.Int64(args.size, -100, 100, args.null_proportion); Bench(values); @@ -129,19 +147,43 @@ struct TakeBenchmark { } void FixedSizeBinary() { - const int32_t byte_width = static_cast(state.range(2)); + const auto byte_width = static_cast(state.range(kByteWidthRange)); auto values = rand.FixedSizeBinary(args.size, byte_width, args.null_proportion); Bench(values); state.counters["byte_width"] = byte_width; } void String() { - int32_t string_min_length = 0, string_max_length = 32; - auto values = std::static_pointer_cast(rand.String( - args.size, string_min_length, string_max_length, args.null_proportion)); + auto values = std::static_pointer_cast( + rand.String(args.size, kStringMinLength, kStringMaxLength, args.null_proportion)); Bench(values); } + void ChunkedInt64(int64_t num_chunks, bool chunk_indices_too) { + auto chunked_array = GenChunkedArray(num_chunks, [this](int64_t chunk_length) { + return rand.Int64(chunk_length, -100, 100, args.null_proportion); + }); + BenchChunked(chunked_array, chunk_indices_too); + } + + void ChunkedFSB(int64_t num_chunks, bool chunk_indices_too) { + const auto byte_width = static_cast(state.range(kByteWidthRange)); + auto chunked_array = + GenChunkedArray(num_chunks, [this, byte_width](int64_t chunk_length) { + return rand.FixedSizeBinary(chunk_length, byte_width, args.null_proportion); + }); + BenchChunked(chunked_array, chunk_indices_too); + state.counters["byte_width"] = byte_width; + } + + void ChunkedString(int64_t num_chunks, bool chunk_indices_too) { + auto chunked_array = GenChunkedArray(num_chunks, [this](int64_t chunk_length) { + return std::static_pointer_cast(rand.String( + chunk_length, kStringMinLength, kStringMaxLength, args.null_proportion)); + }); + BenchChunked(chunked_array, chunk_indices_too); + } + void Bench(const std::shared_ptr& values) { double indices_null_proportion = indices_have_nulls ? args.null_proportion : 0; auto indices = @@ -158,6 +200,40 @@ struct TakeBenchmark { } state.SetItemsProcessed(state.iterations() * values->length()); } + + void BenchChunked(const std::shared_ptr& values, bool chunk_indices_too) { + double indices_null_proportion = indices_have_nulls ? args.null_proportion : 0; + auto indices = + rand.Int32(values->length(), 0, static_cast(values->length() - 1), + indices_null_proportion); + + if (monotonic_indices) { + auto arg_sorter = *SortIndices(*indices); + indices = *Take(*indices, *arg_sorter); + } + std::shared_ptr chunked_indices; + if (chunk_indices_too) { + std::vector> indices_chunks; + int64_t offset = 0; + for (int i = 0; i < values->num_chunks(); ++i) { + auto chunk = indices->Slice(offset, values->chunk(i)->length()); + indices_chunks.push_back(std::move(chunk)); + offset += values->chunk(i)->length(); + } + chunked_indices = std::make_shared(std::move(indices_chunks)); + } + + if (chunk_indices_too) { + for (auto _ : state) { + ABORT_NOT_OK(Take(values, chunked_indices).status()); + } + } else { + for (auto _ : state) { + ABORT_NOT_OK(Take(values, indices).status()); + } + } + state.SetItemsProcessed(state.iterations() * values->length()); + } }; struct FilterBenchmark { @@ -298,11 +374,11 @@ static void FilterRecordBatchWithNulls(benchmark::State& state) { } static void TakeInt64RandomIndicesNoNulls(benchmark::State& state) { - TakeBenchmark(state, false).Int64(); + TakeBenchmark(state, /*indices_with_nulls=*/false).Int64(); } static void TakeInt64RandomIndicesWithNulls(benchmark::State& state) { - TakeBenchmark(state, true).Int64(); + TakeBenchmark(state, /*indices_with_nulls=*/true).Int64(); } static void TakeInt64MonotonicIndices(benchmark::State& state) { @@ -310,11 +386,11 @@ static void TakeInt64MonotonicIndices(benchmark::State& state) { } static void TakeFixedSizeBinaryRandomIndicesNoNulls(benchmark::State& state) { - TakeBenchmark(state, false).FixedSizeBinary(); + TakeBenchmark(state, /*indices_with_nulls=*/false).FixedSizeBinary(); } static void TakeFixedSizeBinaryRandomIndicesWithNulls(benchmark::State& state) { - TakeBenchmark(state, true).FixedSizeBinary(); + TakeBenchmark(state, /*indices_with_nulls=*/true).FixedSizeBinary(); } static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) { @@ -323,11 +399,11 @@ static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) { } static void TakeFSLInt64RandomIndicesNoNulls(benchmark::State& state) { - TakeBenchmark(state, false).FSLInt64(); + TakeBenchmark(state, /*indices_with_nulls=*/false).FSLInt64(); } static void TakeFSLInt64RandomIndicesWithNulls(benchmark::State& state) { - TakeBenchmark(state, true).FSLInt64(); + TakeBenchmark(state, /*indices_with_nulls=*/true).FSLInt64(); } static void TakeFSLInt64MonotonicIndices(benchmark::State& state) { @@ -335,17 +411,79 @@ static void TakeFSLInt64MonotonicIndices(benchmark::State& state) { } static void TakeStringRandomIndicesNoNulls(benchmark::State& state) { - TakeBenchmark(state, false).String(); + TakeBenchmark(state, /*indices_with_nulls=*/false).String(); } static void TakeStringRandomIndicesWithNulls(benchmark::State& state) { - TakeBenchmark(state, true).String(); + TakeBenchmark(state, /*indices_with_nulls=*/true).String(); } static void TakeStringMonotonicIndices(benchmark::State& state) { TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true).FSLInt64(); } +static void TakeChunkedChunkedInt64RandomIndicesNoNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false) + .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedInt64RandomIndicesWithNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/true) + .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedInt64MonotonicIndices(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true) + .ChunkedInt64( + /*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedFSBRandomIndicesNoNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false) + .ChunkedFSB(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedFSBRandomIndicesWithNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/true) + .ChunkedFSB(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedFSBMonotonicIndices(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true) + .ChunkedFSB(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedStringRandomIndicesNoNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false) + .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedStringRandomIndicesWithNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/true) + .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedChunkedStringMonotonicIndices(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true) + .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true); +} + +static void TakeChunkedFlatInt64RandomIndicesNoNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false) + .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/false); +} + +static void TakeChunkedFlatInt64RandomIndicesWithNulls(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/true) + .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/false); +} + +static void TakeChunkedFlatInt64MonotonicIndices(benchmark::State& state) { + TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true) + .ChunkedInt64( + /*num_chunks=*/100, /*chunk_indices_too=*/false); +} + void FilterSetArgs(benchmark::internal::Benchmark* bench) { for (int64_t size : g_data_sizes) { for (int i = 0; i < static_cast(g_filter_params.size()); ++i) { @@ -405,6 +543,7 @@ void TakeFSBSetArgs(benchmark::internal::Benchmark* bench) { } } +// Flat values x Flat indices BENCHMARK(TakeInt64RandomIndicesNoNulls)->Apply(TakeSetArgs); BENCHMARK(TakeInt64RandomIndicesWithNulls)->Apply(TakeSetArgs); BENCHMARK(TakeInt64MonotonicIndices)->Apply(TakeSetArgs); @@ -418,5 +557,21 @@ BENCHMARK(TakeStringRandomIndicesNoNulls)->Apply(TakeSetArgs); BENCHMARK(TakeStringRandomIndicesWithNulls)->Apply(TakeSetArgs); BENCHMARK(TakeStringMonotonicIndices)->Apply(TakeSetArgs); +// Chunked values x Chunked indices +BENCHMARK(TakeChunkedChunkedInt64RandomIndicesNoNulls)->Apply(TakeSetArgs); +BENCHMARK(TakeChunkedChunkedInt64RandomIndicesWithNulls)->Apply(TakeSetArgs); +BENCHMARK(TakeChunkedChunkedInt64MonotonicIndices)->Apply(TakeSetArgs); +BENCHMARK(TakeChunkedChunkedFSBRandomIndicesNoNulls)->Apply(TakeFSBSetArgs); +BENCHMARK(TakeChunkedChunkedFSBRandomIndicesWithNulls)->Apply(TakeFSBSetArgs); +BENCHMARK(TakeChunkedChunkedFSBMonotonicIndices)->Apply(TakeFSBSetArgs); +BENCHMARK(TakeChunkedChunkedStringRandomIndicesNoNulls)->Apply(TakeSetArgs); +BENCHMARK(TakeChunkedChunkedStringRandomIndicesWithNulls)->Apply(TakeSetArgs); +BENCHMARK(TakeChunkedChunkedStringMonotonicIndices)->Apply(TakeSetArgs); + +// Chunked values x Flat indices +BENCHMARK(TakeChunkedFlatInt64RandomIndicesNoNulls)->Apply(TakeSetArgs); +BENCHMARK(TakeChunkedFlatInt64RandomIndicesWithNulls)->Apply(TakeSetArgs); +BENCHMARK(TakeChunkedFlatInt64MonotonicIndices)->Apply(TakeSetArgs); + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index 89b3f7d0d3c58..5cd3710828485 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -681,112 +681,122 @@ Status ExtensionTake(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) // R -> RecordBatch // T -> Table -Result> TakeAA(const std::shared_ptr& values, - const std::shared_ptr& indices, - const TakeOptions& options, ExecContext* ctx) { +Result> TakeAAA(const std::shared_ptr& values, + const std::shared_ptr& indices, + const TakeOptions& options, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("array_take", {values, indices}, &options, ctx)); return result.array(); } -Result> TakeCA(const ChunkedArray& values, - const Array& indices, - const TakeOptions& options, - ExecContext* ctx) { - auto num_chunks = values.num_chunks(); - std::shared_ptr current_chunk; - - // Case 1: `values` has a single chunk, so just use it - if (num_chunks == 1) { - current_chunk = values.chunk(0); +Result> TakeCAC(const ChunkedArray& values, + const Array& indices, + const TakeOptions& options, + ExecContext* ctx) { + std::shared_ptr values_array; + if (values.num_chunks() == 1) { + // Case 1: `values` has a single chunk, so just use it + values_array = values.chunk(0); } else { // TODO Case 2: See if all `indices` fall in the same chunk and call Array Take on it // See // https://github.com/apache/arrow/blob/6f2c9041137001f7a9212f244b51bc004efc29af/r/src/compute.cpp#L123-L151 // TODO Case 3: If indices are sorted, can slice them and call Array Take + // (these are relevant to TakeCCC as well) // Case 4: Else, concatenate chunks and call Array Take if (values.chunks().empty()) { - ARROW_ASSIGN_OR_RAISE(current_chunk, MakeArrayOfNull(values.type(), /*length=*/0, - ctx->memory_pool())); + ARROW_ASSIGN_OR_RAISE( + values_array, MakeArrayOfNull(values.type(), /*length=*/0, ctx->memory_pool())); } else { - ARROW_ASSIGN_OR_RAISE(current_chunk, + ARROW_ASSIGN_OR_RAISE(values_array, Concatenate(values.chunks(), ctx->memory_pool())); } } // Call Array Take on our single chunk ARROW_ASSIGN_OR_RAISE(std::shared_ptr new_chunk, - TakeAA(current_chunk->data(), indices.data(), options, ctx)); + TakeAAA(values_array->data(), indices.data(), options, ctx)); std::vector> chunks = {MakeArray(new_chunk)}; return std::make_shared(std::move(chunks)); } -Result> TakeCC(const ChunkedArray& values, - const ChunkedArray& indices, - const TakeOptions& options, - ExecContext* ctx) { - auto num_chunks = indices.num_chunks(); - std::vector> new_chunks(num_chunks); - for (int i = 0; i < num_chunks; i++) { - // Take with that indices chunk - // Note that as currently implemented, this is inefficient because `values` - // will get concatenated on every iteration of this loop - ARROW_ASSIGN_OR_RAISE(std::shared_ptr current_chunk, - TakeCA(values, *indices.chunk(i), options, ctx)); - // Concatenate the result to make a single array for this chunk - ARROW_ASSIGN_OR_RAISE(new_chunks[i], - Concatenate(current_chunk->chunks(), ctx->memory_pool())); +Result> TakeCCC(const ChunkedArray& values, + const ChunkedArray& indices, + const TakeOptions& options, + ExecContext* ctx) { + // XXX: for every chunk in indices, values are gathered from all chunks in values to + // form a new chunk in the result. Performing this concatenation is not ideal, but + // greatly simplifies the implementation before something more efficient is + // implemented. + std::shared_ptr values_array; + if (values.num_chunks() == 1) { + values_array = values.chunk(0); + } else { + if (values.chunks().empty()) { + ARROW_ASSIGN_OR_RAISE( + values_array, MakeArrayOfNull(values.type(), /*length=*/0, ctx->memory_pool())); + } else { + ARROW_ASSIGN_OR_RAISE(values_array, + Concatenate(values.chunks(), ctx->memory_pool())); + } + } + std::vector> new_chunks; + new_chunks.resize(indices.num_chunks()); + for (int i = 0; i < indices.num_chunks(); i++) { + ARROW_ASSIGN_OR_RAISE(auto chunk, TakeAAA(values_array->data(), + indices.chunk(i)->data(), options, ctx)); + new_chunks[i] = MakeArray(chunk); } return std::make_shared(std::move(new_chunks), values.type()); } -Result> TakeAC(const Array& values, - const ChunkedArray& indices, - const TakeOptions& options, - ExecContext* ctx) { +Result> TakeACC(const Array& values, + const ChunkedArray& indices, + const TakeOptions& options, + ExecContext* ctx) { auto num_chunks = indices.num_chunks(); std::vector> new_chunks(num_chunks); for (int i = 0; i < num_chunks; i++) { // Take with that indices chunk ARROW_ASSIGN_OR_RAISE(std::shared_ptr chunk, - TakeAA(values.data(), indices.chunk(i)->data(), options, ctx)); + TakeAAA(values.data(), indices.chunk(i)->data(), options, ctx)); new_chunks[i] = MakeArray(chunk); } return std::make_shared(std::move(new_chunks), values.type()); } -Result> TakeRA(const RecordBatch& batch, - const Array& indices, - const TakeOptions& options, - ExecContext* ctx) { +Result> TakeRAR(const RecordBatch& batch, + const Array& indices, + const TakeOptions& options, + ExecContext* ctx) { auto ncols = batch.num_columns(); auto nrows = indices.length(); std::vector> columns(ncols); for (int j = 0; j < ncols; j++) { ARROW_ASSIGN_OR_RAISE(std::shared_ptr col_data, - TakeAA(batch.column(j)->data(), indices.data(), options, ctx)); + TakeAAA(batch.column(j)->data(), indices.data(), options, ctx)); columns[j] = MakeArray(col_data); } return RecordBatch::Make(batch.schema(), nrows, std::move(columns)); } -Result> TakeTA(const Table& table, const Array& indices, - const TakeOptions& options, ExecContext* ctx) { +Result> TakeTAT(const Table& table, const Array& indices, + const TakeOptions& options, ExecContext* ctx) { auto ncols = table.num_columns(); std::vector> columns(ncols); for (int j = 0; j < ncols; j++) { - ARROW_ASSIGN_OR_RAISE(columns[j], TakeCA(*table.column(j), indices, options, ctx)); + ARROW_ASSIGN_OR_RAISE(columns[j], TakeCAC(*table.column(j), indices, options, ctx)); } return Table::Make(table.schema(), std::move(columns)); } -Result> TakeTC(const Table& table, const ChunkedArray& indices, - const TakeOptions& options, ExecContext* ctx) { +Result> TakeTCT(const Table& table, const ChunkedArray& indices, + const TakeOptions& options, ExecContext* ctx) { auto ncols = table.num_columns(); std::vector> columns(ncols); for (int j = 0; j < ncols; j++) { - ARROW_ASSIGN_OR_RAISE(columns[j], TakeCC(*table.column(j), indices, options, ctx)); + ARROW_ASSIGN_OR_RAISE(columns[j], TakeCCC(*table.column(j), indices, options, ctx)); } return Table::Make(table.schema(), std::move(columns)); } @@ -815,29 +825,29 @@ class TakeMetaFunction : public MetaFunction { switch (args[0].kind()) { case Datum::ARRAY: if (index_kind == Datum::ARRAY) { - return TakeAA(args[0].array(), args[1].array(), take_opts, ctx); + return TakeAAA(args[0].array(), args[1].array(), take_opts, ctx); } else if (index_kind == Datum::CHUNKED_ARRAY) { - return TakeAC(*args[0].make_array(), *args[1].chunked_array(), take_opts, ctx); + return TakeACC(*args[0].make_array(), *args[1].chunked_array(), take_opts, ctx); } break; case Datum::CHUNKED_ARRAY: if (index_kind == Datum::ARRAY) { - return TakeCA(*args[0].chunked_array(), *args[1].make_array(), take_opts, ctx); + return TakeCAC(*args[0].chunked_array(), *args[1].make_array(), take_opts, ctx); } else if (index_kind == Datum::CHUNKED_ARRAY) { - return TakeCC(*args[0].chunked_array(), *args[1].chunked_array(), take_opts, - ctx); + return TakeCCC(*args[0].chunked_array(), *args[1].chunked_array(), take_opts, + ctx); } break; case Datum::RECORD_BATCH: if (index_kind == Datum::ARRAY) { - return TakeRA(*args[0].record_batch(), *args[1].make_array(), take_opts, ctx); + return TakeRAR(*args[0].record_batch(), *args[1].make_array(), take_opts, ctx); } break; case Datum::TABLE: if (index_kind == Datum::ARRAY) { - return TakeTA(*args[0].table(), *args[1].make_array(), take_opts, ctx); + return TakeTAT(*args[0].table(), *args[1].make_array(), take_opts, ctx); } else if (index_kind == Datum::CHUNKED_ARRAY) { - return TakeTC(*args[0].table(), *args[1].chunked_array(), take_opts, ctx); + return TakeTCT(*args[0].table(), *args[1].chunked_array(), take_opts, ctx); } break; default: diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index d3914173b65aa..db2023ef04cad 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -156,25 +156,26 @@ class ChunkedArraySorter : public TypeVisitor { template void MergeNonNulls(uint64_t* range_begin, uint64_t* range_middle, uint64_t* range_end, const std::vector& arrays, uint64_t* temp_indices) { + using ArrowType = typename ArrayType::TypeClass; const ChunkedArrayResolver left_resolver(arrays); const ChunkedArrayResolver right_resolver(arrays); if (order_ == SortOrder::Ascending) { std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, [&](uint64_t left, uint64_t right) { - const auto chunk_left = left_resolver.Resolve(left); - const auto chunk_right = right_resolver.Resolve(right); - return chunk_left.Value() < chunk_right.Value(); + const auto chunk_left = left_resolver.Resolve(left); + const auto chunk_right = right_resolver.Resolve(right); + return chunk_left.Value() < chunk_right.Value(); }); } else { std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, [&](uint64_t left, uint64_t right) { - const auto chunk_left = left_resolver.Resolve(left); - const auto chunk_right = right_resolver.Resolve(right); + const auto chunk_left = left_resolver.Resolve(left); + const auto chunk_right = right_resolver.Resolve(right); // We don't use 'left > right' here to reduce required // operator. If we use 'right < left' here, '<' is only // required. - return chunk_right.Value() < chunk_left.Value(); + return chunk_right.Value() < chunk_left.Value(); }); } // Copy back temp area into main buffer @@ -647,8 +648,7 @@ class TableSorter { Status SortInternal() { // Sort each batch independently and merge to sorted indices. - ARROW_ASSIGN_OR_RAISE(RecordBatchVector batches, BatchesFromTable(table_)); - const int64_t num_batches = static_cast(batches.size()); + const int64_t num_batches = static_cast(batches_.size()); if (num_batches == 0) { return Status::OK(); } @@ -659,7 +659,7 @@ class TableSorter { int64_t end_offset = 0; int64_t null_count = 0; for (int64_t i = 0; i < num_batches; ++i) { - const auto& batch = *batches[i]; + const auto& batch = *batches_[i]; end_offset += batch.num_rows(); RadixRecordBatchSorter sorter(indices_begin_ + begin_offset, indices_begin_ + end_offset, batch, options_); @@ -744,8 +744,6 @@ class TableSorter { uint64_t* nulls_end, uint64_t* temp_indices, int64_t null_count) { - using ArrayType = typename TypeTraits::ArrayType; - auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; @@ -755,11 +753,11 @@ class TableSorter { [&](uint64_t left, uint64_t right) { // First column is either null or nan left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index); - right_loc = right_resolver_.ResolveWithChunkIndexHint( - right, right_loc.chunk_index); - auto chunk_left = first_sort_key.GetChunk(left_loc); - auto chunk_right = first_sort_key.GetChunk(right_loc); + left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); + right_loc = + right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + auto chunk_left = first_sort_key.GetChunk(left_loc); + auto chunk_right = first_sort_key.GetChunk(right_loc); const auto left_is_null = chunk_left.IsNull(); const auto right_is_null = chunk_right.IsNull(); if (left_is_null == right_is_null) { @@ -794,9 +792,9 @@ class TableSorter { [&](uint64_t left, uint64_t right) { // First column is always null left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index); - right_loc = right_resolver_.ResolveWithChunkIndexHint( - right, right_loc.chunk_index); + left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); + right_loc = + right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); return comparator.Compare(left_loc, right_loc, 1); }); // Copy back temp area into main buffer @@ -811,8 +809,6 @@ class TableSorter { uint64_t* range_middle, uint64_t* range_end, uint64_t* temp_indices) { - using ArrayType = typename TypeTraits::ArrayType; - auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; @@ -822,15 +818,15 @@ class TableSorter { [&](uint64_t left, uint64_t right) { // Both values are never null nor NaN. left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index); - right_loc = right_resolver_.ResolveWithChunkIndexHint( - right, right_loc.chunk_index); - auto chunk_left = first_sort_key.GetChunk(left_loc); - auto chunk_right = first_sort_key.GetChunk(right_loc); + left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); + right_loc = + right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + auto chunk_left = first_sort_key.GetChunk(left_loc); + auto chunk_right = first_sort_key.GetChunk(right_loc); DCHECK(!chunk_left.IsNull()); DCHECK(!chunk_right.IsNull()); - auto value_left = chunk_left.Value(); - auto value_right = chunk_right.Value(); + auto value_left = chunk_left.Value(); + auto value_right = chunk_right.Value(); if (value_left == value_right) { // If the left value equals to the right value, // we need to compare the second and following diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h index d7e5575c80765..564afb8c087d2 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h @@ -278,13 +278,13 @@ NullPartitionResult PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indice Partitioner partitioner; if (null_placement == NullPlacement::AtStart) { auto nulls_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); + const auto chunk = resolver.Resolve(ind); return chunk.IsNull(); }); return NullPartitionResult::NullsAtStart(indices_begin, indices_end, nulls_end); } else { auto nulls_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); + const auto chunk = resolver.Resolve(ind); return !chunk.IsNull(); }); return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, nulls_begin); @@ -299,22 +299,22 @@ PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement); } -template -enable_if_t::value, - NullPartitionResult> +template +enable_if_t::value, NullPartitionResult> PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, const ChunkedArrayResolver& resolver, NullPlacement null_placement) { Partitioner partitioner; if (null_placement == NullPlacement::AtStart) { auto null_likes_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return std::isnan(chunk.Value()); + const auto chunk = resolver.Resolve(ind); + return std::isnan(chunk.Value()); }); return NullPartitionResult::NullsAtStart(indices_begin, indices_end, null_likes_end); } else { auto null_likes_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) { - const auto chunk = resolver.Resolve(ind); - return !std::isnan(chunk.Value()); + const auto chunk = resolver.Resolve(ind); + return !std::isnan(chunk.Value()); }); return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, null_likes_begin); } @@ -595,7 +595,6 @@ struct ColumnComparator { template struct ConcreteColumnComparator : public ColumnComparator { - using ArrayType = typename TypeTraits::ArrayType; using Location = typename ResolvedSortKey::LocationType; using ColumnComparator::ColumnComparator; @@ -603,8 +602,8 @@ struct ConcreteColumnComparator : public ColumnComparator { int Compare(const Location& left, const Location& right) const override { const auto& sort_key = this->sort_key_; - const auto chunk_left = sort_key.template GetChunk(left); - const auto chunk_right = sort_key.template GetChunk(right); + const auto chunk_left = sort_key.GetChunk(left); + const auto chunk_right = sort_key.GetChunk(right); if (sort_key.null_count > 0) { const bool is_null_left = chunk_left.IsNull(); const bool is_null_right = chunk_right.IsNull(); @@ -616,8 +615,9 @@ struct ConcreteColumnComparator : public ColumnComparator { return this->null_placement_ == NullPlacement::AtStart ? 1 : -1; } } - return CompareTypeValues(chunk_left.Value(), chunk_right.Value(), - sort_key.order, this->null_placement_); + return CompareTypeValues(chunk_left.template Value(), + chunk_right.template Value(), sort_key.order, + this->null_placement_); } }; @@ -731,10 +731,7 @@ struct ResolvedRecordBatchSortKey { using LocationType = int64_t; - template - ResolvedChunk GetChunk(int64_t index) const { - return {&::arrow::internal::checked_cast(array), index}; - } + ResolvedChunk GetChunk(int64_t index) const { return {&array, index}; } const std::shared_ptr type; std::shared_ptr owned_array; @@ -754,9 +751,8 @@ struct ResolvedTableSortKey { using LocationType = ::arrow::internal::ChunkLocation; - template - ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const { - return {checked_cast(chunks[loc.chunk_index]), loc.index_in_chunk}; + ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const { + return {chunks[loc.chunk_index], loc.index_in_chunk}; } // Make a vector of ResolvedSortKeys for the sort keys and the given table. diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index c55143af0cd59..2058ba9f30757 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -32,17 +32,18 @@ using internal::CpuInfo; namespace util { void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { - int64_t old_top = top_; - top_ += PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t); - // Stack overflow check - ARROW_DCHECK(top_ <= buffer_size_); - *data = buffer_->mutable_data() + old_top + sizeof(uint64_t); + int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t); + // Stack overflow check (see GH-39582). + // XXX cannot return a regular Status because most consumers do not either. + ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow"; + *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); // We set 8 bytes before the beginning of the allocated range and // 8 bytes after the end to check for stack overflow (which would // result in those known bytes being corrupted). - reinterpret_cast(buffer_->mutable_data() + old_top)[0] = kGuard1; - reinterpret_cast(buffer_->mutable_data() + top_)[-1] = kGuard2; + reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; + reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; *id = num_vectors_++; + top_ = new_top; } void TempVectorStack::release(int id, uint32_t num_bytes) { diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt index 1afef3e3b0463..e48bcfaf65bcb 100644 --- a/cpp/src/arrow/dataset/CMakeLists.txt +++ b/cpp/src/arrow/dataset/CMakeLists.txt @@ -38,6 +38,7 @@ endif() set(ARROW_DATASET_STATIC_LINK_LIBS) set(ARROW_DATASET_SHARED_LINK_LIBS) +set(ARROW_DATASET_SHARED_PRIVATE_LINK_LIBS) set(ARROW_DATASET_STATIC_INSTALL_INTERFACE_LIBS) set(ARROW_DATASET_SHARED_INSTALL_INTERFACE_LIBS) @@ -65,9 +66,14 @@ endif() list(APPEND ARROW_DATASET_STATIC_INSTALL_INTERFACE_LIBS ArrowAcero::arrow_acero_static) list(APPEND ARROW_DATASET_SHARED_INSTALL_INTERFACE_LIBS ArrowAcero::arrow_acero_shared) -list(APPEND ARROW_DATASET_STATIC_LINK_LIBS arrow_acero_static ${ARROW_STATIC_LINK_LIBS}) +list(APPEND ARROW_DATASET_STATIC_LINK_LIBS arrow_acero_static) list(APPEND ARROW_DATASET_SHARED_LINK_LIBS arrow_acero_shared) +if(ARROW_WITH_OPENTELEMETRY) + list(APPEND ARROW_DATASET_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) + list(APPEND ARROW_DATASET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) +endif() + add_arrow_lib(arrow_dataset CMAKE_PACKAGE_NAME ArrowDataset @@ -79,12 +85,12 @@ add_arrow_lib(arrow_dataset ${ARROW_DATASET_SRCS} PRECOMPILED_HEADERS "$<$:arrow/dataset/pch.h>" - DEPENDENCIES - toolchain PRIVATE_INCLUDES ${ARROW_DATASET_PRIVATE_INCLUDES} SHARED_LINK_LIBS ${ARROW_DATASET_SHARED_LINK_LIBS} + SHARED_PRIVATE_LINK_LIBS + ${ARROW_DATASET_SHARED_PRIVATE_LINK_LIBS} SHARED_INSTALL_INTERFACE_LIBS ${ARROW_DATASET_SHARED_INSTALL_INTERFACE_LIBS} STATIC_LINK_LIBS @@ -121,7 +127,7 @@ endif() function(ADD_ARROW_DATASET_TEST REL_TEST_NAME) set(options) set(one_value_args PREFIX) - set(multi_value_args LABELS) + set(multi_value_args EXTRA_LINK_LIBS LABELS) cmake_parse_arguments(ARG "${options}" "${one_value_args}" @@ -134,6 +140,12 @@ function(ADD_ARROW_DATASET_TEST REL_TEST_NAME) set(PREFIX "arrow-dataset") endif() + if(ARG_EXTRA_LINK_LIBS) + set(EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS}) + else() + set(EXTRA_LINK_LIBS ${ARROW_DATASET_TEST_LINK_LIBS}) + endif() + if(ARG_LABELS) set(LABELS ${ARG_LABELS}) else() @@ -142,7 +154,7 @@ function(ADD_ARROW_DATASET_TEST REL_TEST_NAME) add_arrow_test(${REL_TEST_NAME} EXTRA_LINK_LIBS - ${ARROW_DATASET_TEST_LINK_LIBS} + ${EXTRA_LINK_LIBS} PREFIX ${PREFIX} LABELS @@ -165,11 +177,13 @@ if(ARROW_CSV) endif() if(ARROW_JSON) - add_arrow_dataset_test(file_json_test) + add_arrow_dataset_test(file_json_test EXTRA_LINK_LIBS ${ARROW_DATASET_TEST_LINK_LIBS} + RapidJSON) endif() if(ARROW_ORC) - add_arrow_dataset_test(file_orc_test) + add_arrow_dataset_test(file_orc_test EXTRA_LINK_LIBS ${ARROW_DATASET_TEST_LINK_LIBS} + orc::orc) endif() if(ARROW_PARQUET) diff --git a/cpp/src/arrow/dataset/dataset_writer.cc b/cpp/src/arrow/dataset/dataset_writer.cc index ae9fb36484bb6..34731d19ab3eb 100644 --- a/cpp/src/arrow/dataset/dataset_writer.cc +++ b/cpp/src/arrow/dataset/dataset_writer.cc @@ -610,7 +610,16 @@ class DatasetWriter::DatasetWriterImpl { bool will_open_file = false; ARROW_ASSIGN_OR_RAISE(auto next_chunk, dir_queue->NextWritableChunk( batch, &remainder, &will_open_file)); - + // GH-39965: `NextWritableChunk` may return an empty batch to signal + // that the current file has reached `max_rows_per_file` and should be + // finished. + if (next_chunk->num_rows() == 0) { + batch = std::move(remainder); + if (batch) { + RETURN_NOT_OK(dir_queue->FinishCurrentFile()); + } + continue; + } backpressure = writer_state_.rows_in_flight_throttle.Acquire(next_chunk->num_rows()); if (!backpressure.is_finished()) { diff --git a/cpp/src/arrow/dataset/dataset_writer_test.cc b/cpp/src/arrow/dataset/dataset_writer_test.cc index 1ac0ec3f39e97..871b6ef6f5507 100644 --- a/cpp/src/arrow/dataset/dataset_writer_test.cc +++ b/cpp/src/arrow/dataset/dataset_writer_test.cc @@ -189,8 +189,7 @@ class DatasetWriterTestFixture : public testing::Test { } } - void AssertCreatedData(const std::vector& expected_files, - bool check_num_record_batches = true) { + void AssertCreatedData(const std::vector& expected_files) { counter_ = 0; for (const auto& expected_file : expected_files) { std::optional written_file = FindFile(expected_file.filename); @@ -198,9 +197,7 @@ class DatasetWriterTestFixture : public testing::Test { int num_batches = 0; AssertBatchesEqual(*MakeBatch(expected_file.start, expected_file.num_rows), *ReadAsBatch(written_file->data, &num_batches)); - if (check_num_record_batches) { - ASSERT_EQ(expected_file.num_record_batches, num_batches); - } + ASSERT_EQ(expected_file.num_record_batches, num_batches); } } @@ -299,9 +296,7 @@ TEST_F(DatasetWriterTestFixture, MaxRowsOneWriteBackpresure) { expected_files.emplace_back("testdir/chunk-" + std::to_string(i) + ".arrow", kFileSizeLimit * i, kFileSizeLimit); } - // Not checking the number of record batches because file may contain the - // zero-length record batch. - AssertCreatedData(expected_files, /*check_num_record_batches=*/false); + AssertCreatedData(expected_files); } TEST_F(DatasetWriterTestFixture, MaxRowsOneWriteWithFunctor) { @@ -348,6 +343,23 @@ TEST_F(DatasetWriterTestFixture, MaxRowsManyWrites) { {{"testdir/chunk-0.arrow", 0, 10, 4}, {"testdir/chunk-1.arrow", 10, 8, 3}}); } +TEST_F(DatasetWriterTestFixture, NotProduceZeroSizedBatch) { + // GH-39965: avoid creating zero-sized batch when max_rows_per_file enabled. + write_options_.max_rows_per_file = 10; + write_options_.max_rows_per_group = 10; + auto dataset_writer = MakeDatasetWriter(); + dataset_writer->WriteRecordBatch(MakeBatch(20), ""); + dataset_writer->WriteRecordBatch(MakeBatch(20), ""); + EndWriterChecked(dataset_writer.get()); + AssertCreatedData({ + {"testdir/chunk-0.arrow", 0, 10, 1}, + {"testdir/chunk-1.arrow", 10, 10, 1}, + {"testdir/chunk-2.arrow", 20, 10, 1}, + {"testdir/chunk-3.arrow", 30, 10, 1}, + }); + AssertNotFiles({"testdir/chunk-4.arrow"}); +} + TEST_F(DatasetWriterTestFixture, MinRowGroup) { write_options_.min_rows_per_group = 20; auto dataset_writer = MakeDatasetWriter(); diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc index 755b202439be6..60a6685dc22fd 100644 --- a/cpp/src/arrow/dataset/file_csv_test.cc +++ b/cpp/src/arrow/dataset/file_csv_test.cc @@ -36,6 +36,7 @@ #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" +#include "arrow/util/config.h" namespace arrow { namespace dataset { diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index 140917a2e6341..c17ba89be7907 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -779,6 +779,11 @@ ParquetFileFragment::ParquetFileFragment(FileSource source, parquet_format_(checked_cast(*format_)), row_groups_(std::move(row_groups)) {} +std::shared_ptr ParquetFileFragment::metadata() { + auto lock = physical_schema_mutex_.Lock(); + return metadata_; +} + Status ParquetFileFragment::EnsureCompleteMetadata(parquet::arrow::FileReader* reader) { auto lock = physical_schema_mutex_.Lock(); if (metadata_ != nullptr) { diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h index 5141f36385e3f..63d8fd729223c 100644 --- a/cpp/src/arrow/dataset/file_parquet.h +++ b/cpp/src/arrow/dataset/file_parquet.h @@ -165,7 +165,7 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment { } /// \brief Return the FileMetaData associated with this fragment. - const std::shared_ptr& metadata() const { return metadata_; } + std::shared_ptr metadata(); /// \brief Ensure this fragment's FileMetaData is in memory. Status EnsureCompleteMetadata(parquet::arrow::FileReader* reader = NULLPTR); diff --git a/cpp/src/arrow/engine/simple_extension_type_internal.h b/cpp/src/arrow/engine/simple_extension_type_internal.h index c3f0226283d5f..73dbb9f7cb78d 100644 --- a/cpp/src/arrow/engine/simple_extension_type_internal.h +++ b/cpp/src/arrow/engine/simple_extension_type_internal.h @@ -70,8 +70,9 @@ class SimpleExtensionType : public ExtensionType { std::string extension_name() const override { return std::string(kExtensionName); } - std::string ToString() const override { return "extension<" + this->Serialize() + ">"; } - + std::string ToString(bool show_metadata = false) const override { + return "extension<" + this->Serialize() + ">"; + } /// \brief A comparator which returns true iff all parameter properties are equal struct ExtensionEqualsImpl { ExtensionEqualsImpl(const Params& l, const Params& r) : left_(l), right_(r) { diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.cc b/cpp/src/arrow/extension/fixed_shape_tensor.cc index 02e0a890e4b3d..1101b08307332 100644 --- a/cpp/src/arrow/extension/fixed_shape_tensor.cc +++ b/cpp/src/arrow/extension/fixed_shape_tensor.cc @@ -108,10 +108,10 @@ bool FixedShapeTensorType::ExtensionEquals(const ExtensionType& other) const { permutation_equivalent; } -std::string FixedShapeTensorType::ToString() const { +std::string FixedShapeTensorType::ToString(bool show_metadata) const { std::stringstream ss; ss << "extension<" << this->extension_name() - << "[value_type=" << value_type_->ToString() + << "[value_type=" << value_type_->ToString(show_metadata) << ", shape=" << ::arrow::internal::PrintVector{shape_, ","}; if (!permutation_.empty()) { diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h index 591a7cee32a34..3fec79b5c2a3c 100644 --- a/cpp/src/arrow/extension/fixed_shape_tensor.h +++ b/cpp/src/arrow/extension/fixed_shape_tensor.h @@ -61,7 +61,7 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType { dim_names_(dim_names) {} std::string extension_name() const override { return "arrow.fixed_shape_tensor"; } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; /// Number of dimensions of tensor elements size_t ndim() const { return shape_.size(); } diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc index 1199336763ddb..cf8dda7a85df4 100644 --- a/cpp/src/arrow/extension_type.cc +++ b/cpp/src/arrow/extension_type.cc @@ -41,7 +41,7 @@ using internal::checked_cast; DataTypeLayout ExtensionType::layout() const { return storage_type_->layout(); } -std::string ExtensionType::ToString() const { +std::string ExtensionType::ToString(bool show_metadata) const { std::stringstream ss; ss << "extension<" << this->extension_name() << ">"; return ss.str(); diff --git a/cpp/src/arrow/extension_type.h b/cpp/src/arrow/extension_type.h index dd004118e83c9..0fd7216f1820b 100644 --- a/cpp/src/arrow/extension_type.h +++ b/cpp/src/arrow/extension_type.h @@ -50,7 +50,7 @@ class ARROW_EXPORT ExtensionType : public DataType { DataTypeLayout layout() const override; - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "extension"; } diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index a42a8d0f8c1b6..b9ed11e7608f3 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -43,6 +43,7 @@ if(ARROW_GCS) EXTRA_LABELS filesystem EXTRA_LINK_LIBS + google-cloud-cpp::storage Boost::filesystem Boost::system) endif() @@ -52,6 +53,7 @@ if(ARROW_AZURE) EXTRA_LABELS filesystem EXTRA_LINK_LIBS + ${AZURE_SDK_LINK_LIBRARIES} Boost::filesystem Boost::system) endif() @@ -64,6 +66,7 @@ if(ARROW_S3) EXTRA_LABELS filesystem EXTRA_LINK_LIBS + ${AWSSDK_LINK_LIBRARIES} Boost::filesystem Boost::system) if(TARGET arrow-s3fs-test) @@ -71,17 +74,13 @@ if(ARROW_S3) get_target_property(AWS_CPP_SDK_S3_TYPE aws-cpp-sdk-s3 TYPE) # We need to initialize AWS C++ SDK for direct use (not via # arrow::fs::S3FileSystem) in arrow-s3fs-test if we use static AWS - # C++ SDK. Because AWS C++ SDK has internal static variables that - # aren't shared in libarrow and arrow-s3fs-test. It means that - # arrow::fs::InitializeS3() doesn't initialize AWS C++ SDK that is - # directly used in arrow-s3fs-test. - # - # But it seems that internal static variables in AWS C++ SDK are - # shared on macOS even if we link static AWS C++ SDK to both - # libarrow and arrow-s3fs-test. So we don't need to initialize AWS - # C++ SDK in arrow-s3fs-test on macOS. - if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" AND NOT APPLE) - list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_NOT_SHARED") + # C++ SDK and hide symbols of them. Because AWS C++ SDK has + # internal static variables that aren't shared in libarrow and + # arrow-s3fs-test. It means that arrow::fs::InitializeS3() doesn't + # initialize AWS C++ SDK that is directly used in arrow-s3fs-test. + if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" + AND CXX_LINKER_SUPPORTS_VERSION_SCRIPT) + list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_PRIVATE_STATIC") endif() target_compile_definitions(arrow-s3fs-test PRIVATE ${ARROW_S3FS_TEST_COMPILE_DEFINITIONS}) @@ -102,6 +101,7 @@ if(ARROW_S3) s3fs_benchmark.cc s3_test_util.cc STATIC_LINK_LIBS + ${AWSSDK_LINK_LIBRARIES} ${ARROW_BENCHMARK_LINK_LIBS} Boost::filesystem Boost::system) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index a987d63a6d247..5fefe6b7cb016 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -2898,12 +2898,16 @@ struct AwsInstance { if (is_finalized_.load()) { return Status::Invalid("Attempt to initialize S3 after it has been finalized"); } - if (!is_initialized_.exchange(true)) { - // Not already initialized + bool newly_initialized = false; + // EnsureInitialized() can be called concurrently by FileSystemFromUri, + // therefore we need to serialize initialization (GH-39897). + std::call_once(initialize_flag_, [&]() { + bool was_initialized = is_initialized_.exchange(true); + DCHECK(!was_initialized); DoInitialize(options); - return true; - } - return false; + newly_initialized = true; + }); + return newly_initialized; } bool IsInitialized() { return !is_finalized_ && is_initialized_; } @@ -2979,6 +2983,7 @@ struct AwsInstance { Aws::SDKOptions aws_options_; std::atomic is_initialized_; std::atomic is_finalized_; + std::once_flag initialize_flag_; }; AwsInstance* GetAwsInstance() { diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 33e9712a666cd..ad7aaa1bd43cf 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -150,7 +150,7 @@ class ShortRetryStrategy : public S3RetryStrategy { class AwsTestMixin : public ::testing::Test { public: void SetUp() override { -#ifdef AWS_CPP_SDK_S3_NOT_SHARED +#ifdef AWS_CPP_SDK_S3_PRIVATE_STATIC auto aws_log_level = Aws::Utils::Logging::LogLevel::Fatal; aws_options_.loggingOptions.logLevel = aws_log_level; aws_options_.loggingOptions.logger_create_fn = [&aws_log_level] { @@ -161,13 +161,13 @@ class AwsTestMixin : public ::testing::Test { } void TearDown() override { -#ifdef AWS_CPP_SDK_S3_NOT_SHARED +#ifdef AWS_CPP_SDK_S3_PRIVATE_STATIC Aws::ShutdownAPI(aws_options_); #endif } private: -#ifdef AWS_CPP_SDK_S3_NOT_SHARED +#ifdef AWS_CPP_SDK_S3_PRIVATE_STATIC Aws::SDKOptions aws_options_; #endif }; @@ -190,8 +190,11 @@ class S3TestMixin : public AwsTestMixin { } void TearDown() override { - client_.reset(); // Aws::S3::S3Client destruction relies on AWS SDK, so it must be - // reset before Aws::ShutdownAPI + // Aws::S3::S3Client destruction relies on AWS SDK, so it must be + // reset before Aws::ShutdownAPI + client_.reset(); + client_config_.reset(); + AwsTestMixin::TearDown(); } @@ -468,6 +471,13 @@ class TestS3FS : public S3TestMixin { } } + void TearDown() override { + // Aws::S3::S3Client destruction relies on AWS SDK, so it must be + // reset before Aws::ShutdownAPI + fs_.reset(); + S3TestMixin::TearDown(); + } + Result> MakeNewFileSystem( io::IOContext io_context = io::default_io_context()) { options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); @@ -1359,6 +1369,14 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest { fs_ = std::make_shared("s3fs-test-bucket", s3fs_); } + void TearDown() override { + // Aws::S3::S3Client destruction relies on AWS SDK, so it must be + // reset before Aws::ShutdownAPI + s3fs_.reset(); + fs_.reset(); + S3TestMixin::TearDown(); + } + protected: std::shared_ptr GetEmptyFileSystem() override { return fs_; } diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt index 91e0fbf9136d3..8eba89b8e78a6 100644 --- a/cpp/src/arrow/flight/CMakeLists.txt +++ b/cpp/src/arrow/flight/CMakeLists.txt @@ -20,7 +20,9 @@ add_custom_target(arrow_flight) arrow_install_all_headers("arrow/flight") set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++ ${ARROW_PROTOBUF_LIBPROTOBUF}) - +if(ARROW_WITH_OPENTELEMETRY) + list(APPEND ARROW_FLIGHT_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) +endif() if(WIN32) list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib) endif() @@ -66,8 +68,7 @@ list(APPEND ARROW_FLIGHT_TEST_INTERFACE_LIBS Boost::headers Boost::filesystem - Boost::system - ${ARROW_GTEST_GMOCK}) + Boost::system) list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++) # TODO(wesm): Protobuf shared vs static linking @@ -260,9 +261,7 @@ if(ARROW_TESTING) test_definitions.cc test_util.cc DEPENDENCIES - ${ARROW_GTEST_GTEST} flight_grpc_gen - arrow_dependencies SHARED_LINK_LIBS ${ARROW_FLIGHT_TESTING_SHARED_LINK_LIBS} SHARED_INSTALL_INTERFACE_LIBS diff --git a/cpp/src/arrow/flight/sql/server_session_middleware.cc b/cpp/src/arrow/flight/sql/server_session_middleware.cc index f3e02de232444..43609ea8cc43c 100644 --- a/cpp/src/arrow/flight/sql/server_session_middleware.cc +++ b/cpp/src/arrow/flight/sql/server_session_middleware.cc @@ -80,7 +80,7 @@ class ServerSessionMiddlewareImpl : public ServerSessionMiddleware { Status CloseSession() override { const std::lock_guard l(mutex_); - if (static_cast(session_)) { + if (!static_cast(session_)) { return Status::Invalid("Nonexistent session cannot be closed."); } ARROW_RETURN_NOT_OK(factory_->CloseSession(session_id_)); diff --git a/cpp/src/arrow/gpu/cuda_test.cc b/cpp/src/arrow/gpu/cuda_test.cc index c39dbe28e808a..d2f01cb3bbc0c 100644 --- a/cpp/src/arrow/gpu/cuda_test.cc +++ b/cpp/src/arrow/gpu/cuda_test.cc @@ -23,6 +23,8 @@ #include "gtest/gtest.h" +#include "arrow/c/bridge.h" +#include "arrow/c/util_internal.h" #include "arrow/io/memory.h" #include "arrow/ipc/api.h" #include "arrow/ipc/dictionary.h" @@ -38,8 +40,11 @@ namespace arrow { +using internal::ArrayExportGuard; +using internal::ArrayStreamExportGuard; using internal::checked_cast; using internal::checked_pointer_cast; +using internal::SchemaExportGuard; namespace cuda { @@ -703,5 +708,118 @@ TEST_F(TestCudaArrowIpc, DictionaryWriteRead) { CompareBatch(*batch, *cpu_batch); } +// ------------------------------------------------------------------------ +// Test C Device Interface export/import with CUDA +// (equivalent tests for non-CUDA live in bridge_test.cc) + +class TestCudaDeviceArrayRoundtrip : public ::testing::Test { + public: + using ArrayFactory = std::function>()>; + + static Result> DeviceMapper(ArrowDeviceType type, + int64_t id) { + if (type != ARROW_DEVICE_CUDA) { + return Status::NotImplemented("should only be CUDA device"); + } + + ARROW_ASSIGN_OR_RAISE(auto manager, cuda::CudaDeviceManager::Instance()); + ARROW_ASSIGN_OR_RAISE(auto device, manager->GetDevice(id)); + return device->default_memory_manager(); + } + + static ArrayFactory JSONArrayFactory(std::shared_ptr type, const char* json) { + return [=]() { return ArrayFromJSON(type, json); }; + } + + template + void TestWithArrayFactory(ArrayFactory&& factory) { + TestWithArrayFactory(factory, factory); + } + + template + void TestWithArrayFactory(ArrayFactory&& factory, + ExpectedArrayFactory&& factory_expected) { + ASSERT_OK_AND_ASSIGN(auto manager, cuda::CudaDeviceManager::Instance()); + ASSERT_OK_AND_ASSIGN(auto device, manager->GetDevice(0)); + auto mm = device->default_memory_manager(); + + std::shared_ptr array; + std::shared_ptr device_array; + ASSERT_OK_AND_ASSIGN(array, factory()); + ASSERT_OK_AND_ASSIGN(device_array, array->CopyTo(mm)); + + struct ArrowDeviceArray c_array {}; + struct ArrowSchema c_schema {}; + ArrayExportGuard array_guard(&c_array.array); + SchemaExportGuard schema_guard(&c_schema); + + ASSERT_OK(ExportType(*device_array->type(), &c_schema)); + std::shared_ptr sync{nullptr}; + ASSERT_OK(ExportDeviceArray(*device_array, sync, &c_array)); + + std::shared_ptr device_array_roundtripped; + ASSERT_OK_AND_ASSIGN(device_array_roundtripped, + ImportDeviceArray(&c_array, &c_schema, DeviceMapper)); + ASSERT_TRUE(ArrowSchemaIsReleased(&c_schema)); + ASSERT_TRUE(ArrowArrayIsReleased(&c_array.array)); + + // Check value of imported array (copy to CPU to assert equality) + std::shared_ptr array_roundtripped; + ASSERT_OK_AND_ASSIGN(array_roundtripped, + device_array_roundtripped->CopyTo(default_cpu_memory_manager())); + ASSERT_OK(array_roundtripped->ValidateFull()); + { + std::shared_ptr expected; + ASSERT_OK_AND_ASSIGN(expected, factory_expected()); + AssertTypeEqual(*expected->type(), *array_roundtripped->type()); + AssertArraysEqual(*expected, *array_roundtripped, true); + } + + // Re-export and re-import, now both at once + ASSERT_OK(ExportDeviceArray(*device_array, sync, &c_array, &c_schema)); + device_array_roundtripped.reset(); + ASSERT_OK_AND_ASSIGN(device_array_roundtripped, + ImportDeviceArray(&c_array, &c_schema, DeviceMapper)); + ASSERT_TRUE(ArrowSchemaIsReleased(&c_schema)); + ASSERT_TRUE(ArrowArrayIsReleased(&c_array.array)); + + // Check value of imported array (copy to CPU to assert equality) + array_roundtripped.reset(); + ASSERT_OK_AND_ASSIGN(array_roundtripped, + device_array_roundtripped->CopyTo(default_cpu_memory_manager())); + ASSERT_OK(array_roundtripped->ValidateFull()); + { + std::shared_ptr expected; + ASSERT_OK_AND_ASSIGN(expected, factory_expected()); + AssertTypeEqual(*expected->type(), *array_roundtripped->type()); + AssertArraysEqual(*expected, *array_roundtripped, true); + } + } + + void TestWithJSON(std::shared_ptr type, const char* json) { + TestWithArrayFactory(JSONArrayFactory(type, json)); + } +}; + +TEST_F(TestCudaDeviceArrayRoundtrip, Primitive) { TestWithJSON(int32(), "[4, 5, null]"); } + +TEST_F(TestCudaDeviceArrayRoundtrip, Struct) { + auto type = struct_({field("ints", int16()), field("strs", utf8())}); + + TestWithJSON(type, "[]"); + TestWithJSON(type, R"([[4, "foo"], [5, "bar"]])"); + TestWithJSON(type, R"([[4, null], null, [5, "foo"]])"); +} + +TEST_F(TestCudaDeviceArrayRoundtrip, Dictionary) { + auto factory = []() { + auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + auto indices = ArrayFromJSON(uint16(), "[0, 2, 1, null, 1]"); + return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), + indices, values); + }; + TestWithArrayFactory(factory); +} + } // namespace cuda } // namespace arrow diff --git a/cpp/src/arrow/integration/CMakeLists.txt b/cpp/src/arrow/integration/CMakeLists.txt index 0d7b7e5a7d47d..350487052de1b 100644 --- a/cpp/src/arrow/integration/CMakeLists.txt +++ b/cpp/src/arrow/integration/CMakeLists.txt @@ -21,12 +21,15 @@ arrow_install_all_headers("arrow/integration") # - an executable that can be called to answer integration test requests # - a self-(unit)test for the C++ side of integration testing if(ARROW_BUILD_TESTS) - add_arrow_test(json_integration_test EXTRA_LINK_LIBS ${GFLAGS_LIBRARIES}) + add_arrow_test(json_integration_test EXTRA_LINK_LIBS RapidJSON ${GFLAGS_LIBRARIES}) add_dependencies(arrow-integration arrow-json-integration-test) elseif(ARROW_BUILD_INTEGRATION) add_executable(arrow-json-integration-test json_integration_test.cc) - target_link_libraries(arrow-json-integration-test ${ARROW_TEST_LINK_LIBS} - ${GFLAGS_LIBRARIES} ${ARROW_GTEST_GTEST}) + target_link_libraries(arrow-json-integration-test + RapidJSON + ${ARROW_TEST_LINK_LIBS} + ${GFLAGS_LIBRARIES} + ${ARROW_GTEST_GTEST}) add_dependencies(arrow-json-integration-test arrow arrow_testing) add_dependencies(arrow-integration arrow-json-integration-test) diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt index d8224192ce0fb..041d511083457 100644 --- a/cpp/src/arrow/io/CMakeLists.txt +++ b/cpp/src/arrow/io/CMakeLists.txt @@ -28,6 +28,7 @@ if(ARROW_HDFS) PREFIX "arrow-io" EXTRA_LINK_LIBS + arrow::hadoop Boost::filesystem Boost::system) endif() diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 3b18bb7b0f0f4..00426f9957b1f 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -572,7 +572,7 @@ class MemoryMappedFile::MemoryMap if (static_cast(static_cast(mmap_length)) != mmap_length) { return Status::CapacityError("Requested memory map length ", mmap_length, " does not fit in a C size_t " - "(are you using a 32-bit build of Arrow?"); + "(are you using a 32-bit build of Arrow?)"); } void* result = mmap(nullptr, static_cast(mmap_length), prot_flags_, map_mode_, diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 4b62bdc3a77f2..8b7eee495808b 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -18,12 +18,10 @@ # # Messaging and interprocess communication -add_custom_target(arrow_ipc) - function(ADD_ARROW_IPC_TEST REL_TEST_NAME) set(options) set(one_value_args PREFIX) - set(multi_value_args LABELS) + set(multi_value_args) cmake_parse_arguments(ARG "${options}" "${one_value_args}" @@ -36,17 +34,12 @@ function(ADD_ARROW_IPC_TEST REL_TEST_NAME) set(PREFIX "arrow-ipc") endif() - add_arrow_test(${REL_TEST_NAME} - EXTRA_LINK_LIBS - ${ARROW_DATASET_TEST_LINK_LIBS} - PREFIX - ${PREFIX} - ${ARG_UNPARSED_ARGUMENTS}) + add_arrow_test(${REL_TEST_NAME} PREFIX ${PREFIX} ${ARG_UNPARSED_ARGUMENTS}) endfunction() add_arrow_test(feather_test) add_arrow_ipc_test(json_simple_test) -add_arrow_ipc_test(read_write_test) +add_arrow_ipc_test(read_write_test EXTRA_LINK_LIBS arrow::flatbuffers) add_arrow_ipc_test(tensor_test) # Headers: top level diff --git a/cpp/src/arrow/ipc/feather_test.cc b/cpp/src/arrow/ipc/feather_test.cc index 80e441fe2b670..ba3f4d828c397 100644 --- a/cpp/src/arrow/ipc/feather_test.cc +++ b/cpp/src/arrow/ipc/feather_test.cc @@ -35,6 +35,7 @@ #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/compression.h" +#include "arrow/util/config.h" namespace arrow { diff --git a/cpp/src/arrow/ipc/read_write_benchmark.cc b/cpp/src/arrow/ipc/read_write_benchmark.cc index ed7e6957df199..defe9790678c0 100644 --- a/cpp/src/arrow/ipc/read_write_benchmark.cc +++ b/cpp/src/arrow/ipc/read_write_benchmark.cc @@ -30,6 +30,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/type.h" +#include "arrow/util/config.h" #include "arrow/util/io_util.h" namespace arrow { diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h index 4e0ee3dfc8b44..aefb59f3136e4 100644 --- a/cpp/src/arrow/ipc/writer.h +++ b/cpp/src/arrow/ipc/writer.h @@ -113,8 +113,8 @@ class ARROW_EXPORT RecordBatchWriter { /// \brief Write Table with a particular chunksize /// \param[in] table table to write - /// \param[in] max_chunksize maximum length of table chunks. To indicate - /// that no maximum should be enforced, pass -1. + /// \param[in] max_chunksize maximum number of rows for table chunks. To + /// indicate that no maximum should be enforced, pass -1. /// \return Status virtual Status WriteTable(const Table& table, int64_t max_chunksize); diff --git a/cpp/src/arrow/json/CMakeLists.txt b/cpp/src/arrow/json/CMakeLists.txt index f09b15ce51c20..95b299d8f0c33 100644 --- a/cpp/src/arrow/json/CMakeLists.txt +++ b/cpp/src/arrow/json/CMakeLists.txt @@ -23,9 +23,15 @@ add_arrow_test(test parser_test.cc reader_test.cc PREFIX - "arrow-json") + "arrow-json" + EXTRA_LINK_LIBS + RapidJSON) -add_arrow_benchmark(parser_benchmark PREFIX "arrow-json") +add_arrow_benchmark(parser_benchmark + PREFIX + "arrow-json" + EXTRA_LINK_LIBS + RapidJSON) arrow_install_all_headers("arrow/json") # pkg-config support diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index 551880f237586..a7508430c132b 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -251,9 +251,9 @@ class ARROW_EXPORT TableBatchReader : public RecordBatchReader { Status ReadNext(std::shared_ptr* out) override; - /// \brief Set the desired maximum chunk size of record batches + /// \brief Set the desired maximum number of rows for record batches /// - /// The actual chunk size of each record batch may be smaller, depending + /// The actual number of rows in each record batch may be smaller, depending /// on actual chunking characteristics of each table column. void set_chunksize(int64_t chunksize); diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index 5ef1820d5b581..37865948882da 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -232,21 +232,12 @@ void AssertBufferEqual(const Buffer& buffer, const Buffer& expected) { ASSERT_TRUE(buffer.Equals(expected)); } -template -std::string ToStringWithMetadata(const T& t, bool show_metadata) { - return t.ToString(show_metadata); -} - -std::string ToStringWithMetadata(const DataType& t, bool show_metadata) { - return t.ToString(); -} - template void AssertFingerprintablesEqual(const T& left, const T& right, bool check_metadata, const char* types_plural) { ASSERT_TRUE(left.Equals(right, check_metadata)) - << types_plural << " '" << ToStringWithMetadata(left, check_metadata) << "' and '" - << ToStringWithMetadata(right, check_metadata) << "' should have compared equal"; + << types_plural << " '" << left.ToString(check_metadata) << "' and '" + << right.ToString(check_metadata) << "' should have compared equal"; auto lfp = left.fingerprint(); auto rfp = right.fingerprint(); // Note: all types tested in this file should implement fingerprinting, @@ -256,9 +247,8 @@ void AssertFingerprintablesEqual(const T& left, const T& right, bool check_metad rfp += right.metadata_fingerprint(); } ASSERT_EQ(lfp, rfp) << "Fingerprints for " << types_plural << " '" - << ToStringWithMetadata(left, check_metadata) << "' and '" - << ToStringWithMetadata(right, check_metadata) - << "' should have compared equal"; + << left.ToString(check_metadata) << "' and '" + << right.ToString(check_metadata) << "' should have compared equal"; } template @@ -274,8 +264,8 @@ template void AssertFingerprintablesNotEqual(const T& left, const T& right, bool check_metadata, const char* types_plural) { ASSERT_FALSE(left.Equals(right, check_metadata)) - << types_plural << " '" << ToStringWithMetadata(left, check_metadata) << "' and '" - << ToStringWithMetadata(right, check_metadata) << "' should have compared unequal"; + << types_plural << " '" << left.ToString(check_metadata) << "' and '" + << right.ToString(check_metadata) << "' should have compared unequal"; auto lfp = left.fingerprint(); auto rfp = right.fingerprint(); // Note: all types tested in this file should implement fingerprinting, @@ -286,8 +276,8 @@ void AssertFingerprintablesNotEqual(const T& left, const T& right, bool check_me rfp += right.metadata_fingerprint(); } ASSERT_NE(lfp, rfp) << "Fingerprints for " << types_plural << " '" - << ToStringWithMetadata(left, check_metadata) << "' and '" - << ToStringWithMetadata(right, check_metadata) + << left.ToString(check_metadata) << "' and '" + << right.ToString(check_metadata) << "' should have compared unequal"; } } diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 62d2d61598dc8..edf8f0496628c 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -874,7 +874,7 @@ bool Field::IsCompatibleWith(const std::shared_ptr& other) const { std::string Field::ToString(bool show_metadata) const { std::stringstream ss; - ss << name_ << ": " << type_->ToString(); + ss << name_ << ": " << type_->ToString(show_metadata); if (!nullable_) { ss << " not null"; } @@ -919,14 +919,15 @@ std::ostream& operator<<(std::ostream& os, const TypeHolder& type) { // ---------------------------------------------------------------------- // TypeHolder -std::string TypeHolder::ToString(const std::vector& types) { +std::string TypeHolder::ToString(const std::vector& types, + bool show_metadata) { std::stringstream ss; ss << "("; for (size_t i = 0; i < types.size(); ++i) { if (i > 0) { ss << ", "; } - ss << types[i].type->ToString(); + ss << types[i].type->ToString(show_metadata); } ss << ")"; return ss.str(); @@ -984,27 +985,27 @@ BaseBinaryType::~BaseBinaryType() {} BaseListType::~BaseListType() {} -std::string ListType::ToString() const { +std::string ListType::ToString(bool show_metadata) const { std::stringstream s; - s << "list<" << value_field()->ToString() << ">"; + s << "list<" << value_field()->ToString(show_metadata) << ">"; return s.str(); } -std::string LargeListType::ToString() const { +std::string LargeListType::ToString(bool show_metadata) const { std::stringstream s; - s << "large_list<" << value_field()->ToString() << ">"; + s << "large_list<" << value_field()->ToString(show_metadata) << ">"; return s.str(); } -std::string ListViewType::ToString() const { +std::string ListViewType::ToString(bool show_metadata) const { std::stringstream s; - s << "list_view<" << value_field()->ToString() << ">"; + s << "list_view<" << value_field()->ToString(show_metadata) << ">"; return s.str(); } -std::string LargeListViewType::ToString() const { +std::string LargeListViewType::ToString(bool show_metadata) const { std::stringstream s; - s << "large_list_view<" << value_field()->ToString() << ">"; + s << "large_list_view<" << value_field()->ToString(show_metadata) << ">"; return s.str(); } @@ -1047,7 +1048,7 @@ Result> MapType::Make(std::shared_ptr value_fie return std::make_shared(std::move(value_field), keys_sorted); } -std::string MapType::ToString() const { +std::string MapType::ToString(bool show_metadata) const { std::stringstream s; const auto print_field_name = [](std::ostream& os, const Field& field, @@ -1058,7 +1059,7 @@ std::string MapType::ToString() const { }; const auto print_field = [&](std::ostream& os, const Field& field, const char* std_name) { - os << field.type()->ToString(); + os << field.type()->ToString(show_metadata); print_field_name(os, field, std_name); }; @@ -1074,23 +1075,24 @@ std::string MapType::ToString() const { return s.str(); } -std::string FixedSizeListType::ToString() const { +std::string FixedSizeListType::ToString(bool show_metadata) const { std::stringstream s; - s << "fixed_size_list<" << value_field()->ToString() << ">[" << list_size_ << "]"; + s << "fixed_size_list<" << value_field()->ToString(show_metadata) << ">[" << list_size_ + << "]"; return s.str(); } -std::string BinaryType::ToString() const { return "binary"; } +std::string BinaryType::ToString(bool show_metadata) const { return "binary"; } -std::string BinaryViewType::ToString() const { return "binary_view"; } +std::string BinaryViewType::ToString(bool show_metadata) const { return "binary_view"; } -std::string LargeBinaryType::ToString() const { return "large_binary"; } +std::string LargeBinaryType::ToString(bool show_metadata) const { return "large_binary"; } -std::string StringType::ToString() const { return "string"; } +std::string StringType::ToString(bool show_metadata) const { return "string"; } -std::string StringViewType::ToString() const { return "string_view"; } +std::string StringViewType::ToString(bool show_metadata) const { return "string_view"; } -std::string LargeStringType::ToString() const { return "large_string"; } +std::string LargeStringType::ToString(bool show_metadata) const { return "large_string"; } int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); } @@ -1105,7 +1107,7 @@ Result> FixedSizeBinaryType::Make(int32_t byte_width) return std::make_shared(byte_width); } -std::string FixedSizeBinaryType::ToString() const { +std::string FixedSizeBinaryType::ToString(bool show_metadata) const { std::stringstream ss; ss << "fixed_size_binary[" << byte_width_ << "]"; return ss.str(); @@ -1122,9 +1124,13 @@ Date32Type::Date32Type() : DateType(Type::DATE32) {} Date64Type::Date64Type() : DateType(Type::DATE64) {} -std::string Date64Type::ToString() const { return std::string("date64[ms]"); } +std::string Date64Type::ToString(bool show_metadata) const { + return std::string("date64[ms]"); +} -std::string Date32Type::ToString() const { return std::string("date32[day]"); } +std::string Date32Type::ToString(bool show_metadata) const { + return std::string("date32[day]"); +} // ---------------------------------------------------------------------- // Time types @@ -1137,7 +1143,7 @@ Time32Type::Time32Type(TimeUnit::type unit) : TimeType(Type::TIME32, unit) { << "Must be seconds or milliseconds"; } -std::string Time32Type::ToString() const { +std::string Time32Type::ToString(bool show_metadata) const { std::stringstream ss; ss << "time32[" << this->unit_ << "]"; return ss.str(); @@ -1148,7 +1154,7 @@ Time64Type::Time64Type(TimeUnit::type unit) : TimeType(Type::TIME64, unit) { << "Must be microseconds or nanoseconds"; } -std::string Time64Type::ToString() const { +std::string Time64Type::ToString(bool show_metadata) const { std::stringstream ss; ss << "time64[" << this->unit_ << "]"; return ss.str(); @@ -1175,7 +1181,7 @@ std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) { // ---------------------------------------------------------------------- // Timestamp types -std::string TimestampType::ToString() const { +std::string TimestampType::ToString(bool show_metadata) const { std::stringstream ss; ss << "timestamp[" << this->unit_; if (this->timezone_.size() > 0) { @@ -1186,7 +1192,7 @@ std::string TimestampType::ToString() const { } // Duration types -std::string DurationType::ToString() const { +std::string DurationType::ToString(bool show_metadata) const { std::stringstream ss; ss << "duration[" << this->unit_ << "]"; return ss.str(); @@ -1245,7 +1251,7 @@ uint8_t UnionType::max_type_code() const { : *std::max_element(type_codes_.begin(), type_codes_.end()); } -std::string UnionType::ToString() const { +std::string UnionType::ToString(bool show_metadata) const { std::stringstream s; s << name() << "<"; @@ -1254,7 +1260,7 @@ std::string UnionType::ToString() const { if (i) { s << ", "; } - s << children_[i]->ToString() << "=" << static_cast(type_codes_[i]); + s << children_[i]->ToString(show_metadata) << "=" << static_cast(type_codes_[i]); } s << ">"; return s.str(); @@ -1291,10 +1297,10 @@ RunEndEncodedType::RunEndEncodedType(std::shared_ptr run_end_type, RunEndEncodedType::~RunEndEncodedType() = default; -std::string RunEndEncodedType::ToString() const { +std::string RunEndEncodedType::ToString(bool show_metadata) const { std::stringstream s; - s << name() << "ToString() - << ", values: " << value_type()->ToString() << ">"; + s << name() << "ToString(show_metadata) + << ", values: " << value_type()->ToString(show_metadata) << ">"; return s.str(); } @@ -1350,7 +1356,7 @@ StructType::StructType(const FieldVector& fields) StructType::~StructType() {} -std::string StructType::ToString() const { +std::string StructType::ToString(bool show_metadata) const { std::stringstream s; s << "struct<"; for (int i = 0; i < this->num_fields(); ++i) { @@ -1358,7 +1364,7 @@ std::string StructType::ToString() const { s << ", "; } std::shared_ptr field = this->field(i); - s << field->ToString(); + s << field->ToString(show_metadata); } s << ">"; return s.str(); @@ -1523,17 +1529,18 @@ DataTypeLayout DictionaryType::layout() const { return layout; } -std::string DictionaryType::ToString() const { +std::string DictionaryType::ToString(bool show_metadata) const { std::stringstream ss; - ss << this->name() << "ToString() - << ", indices=" << index_type_->ToString() << ", ordered=" << ordered_ << ">"; + ss << this->name() << "ToString(show_metadata) + << ", indices=" << index_type_->ToString(show_metadata) << ", ordered=" << ordered_ + << ">"; return ss.str(); } // ---------------------------------------------------------------------- // Null type -std::string NullType::ToString() const { return name(); } +std::string NullType::ToString(bool show_metadata) const { return name(); } // ---------------------------------------------------------------------- // FieldPath @@ -3304,13 +3311,13 @@ std::shared_ptr decimal256(int32_t precision, int32_t scale) { return std::make_shared(precision, scale); } -std::string Decimal128Type::ToString() const { +std::string Decimal128Type::ToString(bool show_metadata) const { std::stringstream s; s << "decimal128(" << precision_ << ", " << scale_ << ")"; return s.str(); } -std::string Decimal256Type::ToString() const { +std::string Decimal256Type::ToString(bool show_metadata) const { std::stringstream s; s << "decimal256(" << precision_ << ", " << scale_ << ")"; return s.str(); diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 5b1331ab66919..3f651741d3e49 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -162,7 +162,7 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this, Status Accept(TypeVisitor* visitor) const; /// \brief A string representation of the type, including any children - virtual std::string ToString() const = 0; + virtual std::string ToString(bool show_metadata = false) const = 0; /// \brief Return hash value (excluding metadata in child fields) size_t Hash() const; @@ -266,11 +266,11 @@ struct ARROW_EXPORT TypeHolder { bool operator!=(const TypeHolder& other) const { return !(*this == other); } - std::string ToString() const { - return this->type ? this->type->ToString() : ""; + std::string ToString(bool show_metadata = false) const { + return this->type ? this->type->ToString(show_metadata) : ""; } - static std::string ToString(const std::vector&); + static std::string ToString(const std::vector&, bool show_metadata = false); static std::vector FromTypes( const std::vector>& types); @@ -565,7 +565,7 @@ class ARROW_EXPORT CTypeImpl : public BASE { std::string name() const override { return DERIVED::type_name(); } - std::string ToString() const override { return this->name(); } + std::string ToString(bool show_metadata = false) const override { return this->name(); } }; template @@ -587,7 +587,7 @@ class ARROW_EXPORT NullType : public DataType { NullType() : DataType(Type::NA) {} - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; DataTypeLayout layout() const override { return DataTypeLayout({DataTypeLayout::AlwaysNull()}); @@ -769,7 +769,7 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType { DataTypeLayout::VariableWidth()}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "binary"; } protected: @@ -866,7 +866,7 @@ class ARROW_EXPORT BinaryViewType : public DataType { DataTypeLayout::VariableWidth()); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "binary_view"; } protected: @@ -894,7 +894,7 @@ class ARROW_EXPORT LargeBinaryType : public BaseBinaryType { DataTypeLayout::VariableWidth()}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "large_binary"; } protected: @@ -915,7 +915,7 @@ class ARROW_EXPORT StringType : public BinaryType { StringType() : BinaryType(Type::STRING) {} - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "utf8"; } protected: @@ -933,7 +933,7 @@ class ARROW_EXPORT StringViewType : public BinaryViewType { StringViewType() : BinaryViewType(Type::STRING_VIEW) {} - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "utf8_view"; } protected: @@ -951,7 +951,7 @@ class ARROW_EXPORT LargeStringType : public LargeBinaryType { LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {} - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "large_utf8"; } protected: @@ -971,7 +971,7 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public Parametri explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id) : FixedWidthType(override_type_id), byte_width_(byte_width) {} - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "fixed_size_binary"; } DataTypeLayout layout() const override { @@ -1050,7 +1050,7 @@ class ARROW_EXPORT Decimal128Type : public DecimalType { /// Decimal128Type constructor that returns an error on invalid input. static Result> Make(int32_t precision, int32_t scale); - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "decimal128"; } static constexpr int32_t kMinPrecision = 1; @@ -1083,7 +1083,7 @@ class ARROW_EXPORT Decimal256Type : public DecimalType { /// Decimal256Type constructor that returns an error on invalid input. static Result> Make(int32_t precision, int32_t scale); - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "decimal256"; } static constexpr int32_t kMinPrecision = 1; @@ -1134,7 +1134,7 @@ class ARROW_EXPORT ListType : public BaseListType { {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "list"; } @@ -1166,7 +1166,7 @@ class ARROW_EXPORT LargeListType : public BaseListType { {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "large_list"; } @@ -1197,7 +1197,7 @@ class ARROW_EXPORT ListViewType : public BaseListType { DataTypeLayout::FixedWidth(sizeof(offset_type))}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "list_view"; } @@ -1231,7 +1231,7 @@ class ARROW_EXPORT LargeListViewType : public BaseListType { DataTypeLayout::FixedWidth(sizeof(offset_type))}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "large_list_view"; } @@ -1273,7 +1273,7 @@ class ARROW_EXPORT MapType : public ListType { std::shared_ptr item_field() const { return value_type()->field(1); } std::shared_ptr item_type() const { return item_field()->type(); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "map"; } @@ -1308,7 +1308,7 @@ class ARROW_EXPORT FixedSizeListType : public BaseListType { return DataTypeLayout({DataTypeLayout::Bitmap()}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "fixed_size_list"; } @@ -1335,7 +1335,7 @@ class ARROW_EXPORT StructType : public NestedType { return DataTypeLayout({DataTypeLayout::Bitmap()}); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "struct"; } /// Returns null if name not found @@ -1385,7 +1385,7 @@ class ARROW_EXPORT UnionType : public NestedType { DataTypeLayout layout() const override; - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; /// The array of logical type ids. /// @@ -1488,7 +1488,7 @@ class ARROW_EXPORT RunEndEncodedType : public NestedType { const std::shared_ptr& run_end_type() const { return fields()[0]->type(); } const std::shared_ptr& value_type() const { return fields()[1]->type(); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "run_end_encoded"; } @@ -1544,7 +1544,7 @@ class ARROW_EXPORT Date32Type : public DateType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "date32"; } DateUnit unit() const override { return UNIT; } @@ -1567,7 +1567,7 @@ class ARROW_EXPORT Date64Type : public DateType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "date64"; } DateUnit unit() const override { return UNIT; } @@ -1605,7 +1605,7 @@ class ARROW_EXPORT Time32Type : public TimeType { explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI); - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "time32"; } }; @@ -1624,7 +1624,7 @@ class ARROW_EXPORT Time64Type : public TimeType { explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO); - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "time64"; } }; @@ -1679,7 +1679,7 @@ class ARROW_EXPORT TimestampType : public TemporalType, public ParametricType { explicit TimestampType(TimeUnit::type unit, const std::string& timezone) : TemporalType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {} - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "timestamp"; } TimeUnit::type unit() const { return unit_; } @@ -1723,7 +1723,7 @@ class ARROW_EXPORT MonthIntervalType : public IntervalType { MonthIntervalType() : IntervalType(type_id) {} - std::string ToString() const override { return name(); } + std::string ToString(bool show_metadata = false) const override { return name(); } std::string name() const override { return "month_interval"; } }; @@ -1759,7 +1759,7 @@ class ARROW_EXPORT DayTimeIntervalType : public IntervalType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString() const override { return name(); } + std::string ToString(bool show_metadata = false) const override { return name(); } std::string name() const override { return "day_time_interval"; } }; @@ -1799,7 +1799,7 @@ class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString() const override { return name(); } + std::string ToString(bool show_metadata = false) const override { return name(); } std::string name() const override { return "month_day_nano_interval"; } }; @@ -1823,7 +1823,7 @@ class ARROW_EXPORT DurationType : public TemporalType, public ParametricType { explicit DurationType(TimeUnit::type unit = TimeUnit::MILLI) : TemporalType(Type::DURATION), unit_(unit) {} - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "duration"; } TimeUnit::type unit() const { return unit_; } @@ -1857,7 +1857,7 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType { const std::shared_ptr& index_type, const std::shared_ptr& value_type, bool ordered = false); - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; std::string name() const override { return "dictionary"; } int bit_width() const override; diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index 22913f77fbfc1..df484a8fc2c59 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -1893,9 +1894,13 @@ TEST(TestListViewType, Equals) { AssertTypeEqual(list_view_type, list_view_type_named); ASSERT_FALSE(list_view_type.Equals(list_view_type_named, /*check_metadata=*/true)); + ASSERT_NE(list_view_type.ToString(), list_view_type_named.ToString()); } -TEST(TestListType, Metadata) { +using ListListTypeFactory = + std::function(std::shared_ptr)>; + +void CheckListListTypeMetadata(ListListTypeFactory list_type_factory) { auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"}); auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"}); auto md3 = key_value_metadata({"foo"}, {"foo value"}); @@ -1906,83 +1911,49 @@ TEST(TestListType, Metadata) { auto f4 = field("item", utf8()); auto f5 = field("item", utf8(), /*nullable =*/false, md1); - auto t1 = list(f1); - auto t2 = list(f2); - auto t3 = list(f3); - auto t4 = list(f4); - auto t5 = list(f5); + auto t1 = list_type_factory(f1); + auto t2 = list_type_factory(f2); + auto t3 = list_type_factory(f3); + auto t4 = list_type_factory(f4); + auto t5 = list_type_factory(f5); AssertTypeEqual(*t1, *t2); AssertTypeEqual(*t1, *t2, /*check_metadata =*/false); + ASSERT_EQ(t1->ToString(/*show_metadata=*/true), t2->ToString(/*show_metadata=*/true)); AssertTypeEqual(*t1, *t3); AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true); + ASSERT_EQ(t1->ToString(/*show_metadata=*/false), t3->ToString(/*show_metadata=*/false)); + ASSERT_NE(t1->ToString(/*show_metadata=*/true), t3->ToString(/*show_metadata=*/true)); AssertTypeEqual(*t1, *t4); AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true); + ASSERT_EQ(t1->ToString(/*show_metadata=*/false), t4->ToString(/*show_metadata=*/false)); + ASSERT_NE(t1->ToString(/*show_metadata=*/true), t4->ToString(/*show_metadata=*/true)); AssertTypeNotEqual(*t1, *t5); AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true); + ASSERT_NE(t1->ToString(/*show_metadata=*/false), t5->ToString(/*show_metadata=*/false)); + ASSERT_NE(t1->ToString(/*show_metadata=*/true), t5->ToString(/*show_metadata=*/true)); } -TEST(TestListViewType, Metadata) { - auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"}); - auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"}); - auto md3 = key_value_metadata({"foo"}, {"foo value"}); - - auto f1 = field("item", utf8(), /*nullable =*/true, md1); - auto f2 = field("item", utf8(), /*nullable =*/true, md2); - auto f3 = field("item", utf8(), /*nullable =*/true, md3); - auto f4 = field("item", utf8()); - auto f5 = field("item", utf8(), /*nullable =*/false, md1); - - auto t1 = list_view(f1); - auto t2 = list_view(f2); - auto t3 = list_view(f3); - auto t4 = list_view(f4); - auto t5 = list_view(f5); - - AssertTypeEqual(*t1, *t2); - AssertTypeEqual(*t1, *t2, /*check_metadata =*/false); - - AssertTypeEqual(*t1, *t3); - AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true); +TEST(TestListType, Metadata) { + CheckListListTypeMetadata([](std::shared_ptr field) { return list(field); }); +} - AssertTypeEqual(*t1, *t4); - AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true); +TEST(TestLargeListType, Metadata) { + CheckListListTypeMetadata( + [](std::shared_ptr field) { return large_list(field); }); +} - AssertTypeNotEqual(*t1, *t5); - AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true); +TEST(TestListViewType, Metadata) { + CheckListListTypeMetadata( + [](std::shared_ptr field) { return list_view(field); }); } TEST(TestLargeListViewType, Metadata) { - auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"}); - auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"}); - auto md3 = key_value_metadata({"foo"}, {"foo value"}); - - auto f1 = field("item", utf8(), /*nullable =*/true, md1); - auto f2 = field("item", utf8(), /*nullable =*/true, md2); - auto f3 = field("item", utf8(), /*nullable =*/true, md3); - auto f4 = field("item", utf8()); - auto f5 = field("item", utf8(), /*nullable =*/false, md1); - - auto t1 = large_list_view(f1); - auto t2 = large_list_view(f2); - auto t3 = large_list_view(f3); - auto t4 = large_list_view(f4); - auto t5 = large_list_view(f5); - - AssertTypeEqual(*t1, *t2); - AssertTypeEqual(*t1, *t2, /*check_metadata =*/false); - - AssertTypeEqual(*t1, *t3); - AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true); - - AssertTypeEqual(*t1, *t4); - AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true); - - AssertTypeNotEqual(*t1, *t5); - AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true); + CheckListListTypeMetadata( + [](std::shared_ptr field) { return large_list_view(field); }); } TEST(TestNestedType, Equals) { @@ -2124,6 +2095,12 @@ TEST(TestStructType, TestFieldsDifferOnlyInMetadata) { AssertTypeEqual(s0, s1); AssertTypeNotEqual(s0, s1, /* check_metadata = */ true); + ASSERT_NE(s0.ToString(), s1.ToString(/*show_metadata=*/true)); + + std::string expected = R"(struct)"; + ASSERT_EQ(s1.ToString(/*show_metadata=*/true), expected); ASSERT_EQ(s0.fingerprint(), s1.fingerprint()); ASSERT_NE(s0.metadata_fingerprint(), s1.metadata_fingerprint()); diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index badf8a75078ed..e26efba28594b 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -38,6 +38,13 @@ else() set(IO_UTIL_TEST_SOURCES io_util_test.cc) endif() +set(ARROW_UTILITY_TEST_LINK_LIBS Boost::headers) +if(ARROW_USE_XSIMD) + list(APPEND ARROW_UTILITY_TEST_LINK_LIBS ${ARROW_XSIMD}) +endif() +if(ARROW_WITH_OPENTELEMETRY) + list(APPEND ARROW_UTILITY_TEST_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) +endif() add_arrow_test(utility-test SOURCES align_util_test.cc @@ -73,7 +80,9 @@ add_arrow_test(utility-test trie_test.cc uri_test.cc utf8_util_test.cc - value_parsing_test.cc) + value_parsing_test.cc + EXTRA_LINK_LIBS + ${ARROW_UTILITY_TEST_LINK_LIBS}) add_arrow_test(async-utility-test SOURCES @@ -96,7 +105,11 @@ add_arrow_test(threading-utility-test test_common.cc thread_pool_test.cc) -add_arrow_test(crc32-test SOURCES crc32_test.cc) +add_arrow_test(crc32-test + SOURCES + crc32_test.cc + EXTRA_LINK_LIBS + Boost::headers) add_arrow_benchmark(bit_block_counter_benchmark) add_arrow_benchmark(bit_util_benchmark) @@ -113,5 +126,10 @@ add_arrow_benchmark(small_vector_benchmark) add_arrow_benchmark(tdigest_benchmark) add_arrow_benchmark(thread_pool_benchmark) add_arrow_benchmark(trie_benchmark) -add_arrow_benchmark(utf8_util_benchmark) +set(ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS) +if(ARROW_USE_XSIMD) + list(APPEND ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS ${ARROW_XSIMD}) +endif() +add_arrow_benchmark(utf8_util_benchmark EXTRA_LINK_LIBS + ${ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS}) add_arrow_benchmark(value_parsing_benchmark) diff --git a/cpp/src/arrow/util/benchmark_main.cc b/cpp/src/arrow/util/benchmark_main.cc deleted file mode 100644 index c9739af03fb53..0000000000000 --- a/cpp/src/arrow/util/benchmark_main.cc +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "benchmark/benchmark.h" - -int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - return 0; -} diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc index 5ad17e993f153..b63aec0aae8f9 100644 --- a/cpp/src/arrow/util/compression.cc +++ b/cpp/src/arrow/util/compression.cc @@ -24,6 +24,7 @@ #include "arrow/result.h" #include "arrow/status.h" #include "arrow/util/compression_internal.h" +#include "arrow/util/config.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/util/compression_benchmark.cc b/cpp/src/arrow/util/compression_benchmark.cc index cc04eb4634851..0b9727cff9041 100644 --- a/cpp/src/arrow/util/compression_benchmark.cc +++ b/cpp/src/arrow/util/compression_benchmark.cc @@ -27,6 +27,7 @@ #include "arrow/result.h" #include "arrow/util/compression.h" +#include "arrow/util/config.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/util/compression_test.cc b/cpp/src/arrow/util/compression_test.cc index eeeedce17764f..eb2da98d5110d 100644 --- a/cpp/src/arrow/util/compression_test.cc +++ b/cpp/src/arrow/util/compression_test.cc @@ -30,6 +30,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" #include "arrow/util/compression.h" +#include "arrow/util/config.h" namespace arrow { namespace util { @@ -729,7 +730,7 @@ INSTANTIATE_TEST_SUITE_P(TestLZ4Frame, CodecTest, INSTANTIATE_TEST_SUITE_P(TestBrotli, CodecTest, ::testing::Values(Compression::BROTLI)); #endif -#if ARROW_WITH_BZ2 +#ifdef ARROW_WITH_BZ2 INSTANTIATE_TEST_SUITE_P(TestBZ2, CodecTest, ::testing::Values(Compression::BZ2)); #endif diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index 6c8c31ffb856f..fb42a53139f44 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -47,15 +47,26 @@ #cmakedefine ARROW_JEMALLOC #cmakedefine ARROW_JEMALLOC_VENDORED #cmakedefine ARROW_JSON +#cmakedefine ARROW_MIMALLOC #cmakedefine ARROW_ORC #cmakedefine ARROW_PARQUET #cmakedefine ARROW_SUBSTRAIT #cmakedefine ARROW_ENABLE_THREADING #cmakedefine ARROW_GCS +#cmakedefine ARROW_HDFS #cmakedefine ARROW_S3 +#cmakedefine ARROW_USE_GLOG #cmakedefine ARROW_USE_NATIVE_INT128 +#cmakedefine ARROW_WITH_BROTLI +#cmakedefine ARROW_WITH_BZ2 +#cmakedefine ARROW_WITH_LZ4 #cmakedefine ARROW_WITH_MUSL #cmakedefine ARROW_WITH_OPENTELEMETRY +#cmakedefine ARROW_WITH_RE2 +#cmakedefine ARROW_WITH_SNAPPY #cmakedefine ARROW_WITH_UCX +#cmakedefine ARROW_WITH_UTF8PROC +#cmakedefine ARROW_WITH_ZLIB +#cmakedefine ARROW_WITH_ZSTD #cmakedefine PARQUET_REQUIRE_ENCRYPTION diff --git a/cpp/src/arrow/util/decimal_benchmark.cc b/cpp/src/arrow/util/decimal_benchmark.cc index 5ec7f8df870f6..d505532d71da1 100644 --- a/cpp/src/arrow/util/decimal_benchmark.cc +++ b/cpp/src/arrow/util/decimal_benchmark.cc @@ -131,7 +131,7 @@ static void BinaryMathOpAggregate( for (auto _ : state) { BasicDecimal128 result; - for (int x = 0; x < 100; x++) { + for (int x = 0; x < kValueSize; x++) { result += v[x]; } benchmark::DoNotOptimize(result); diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc index 9c68982a3d59f..25c336a6d2111 100644 --- a/cpp/src/arrow/util/logging.cc +++ b/cpp/src/arrow/util/logging.cc @@ -17,6 +17,8 @@ #include "arrow/util/logging.h" +#include "arrow/util/config.h" + #ifdef ARROW_WITH_BACKTRACE #include #endif @@ -28,7 +30,7 @@ #include #include -#include "glog/logging.h" +#include // Restore our versions of DCHECK and friends, as GLog defines its own #undef DCHECK @@ -114,7 +116,7 @@ static std::unique_ptr log_dir_; #ifdef ARROW_USE_GLOG // Glog's severity map. -static int GetMappedSeverity(ArrowLogLevel severity) { +static google::LogSeverity GetMappedSeverity(ArrowLogLevel severity) { switch (severity) { case ArrowLogLevel::ARROW_DEBUG: return google::GLOG_INFO; @@ -146,7 +148,7 @@ void ArrowLog::StartArrowLog(const std::string& app_name, app_name_.reset(new std::string(app_name)); log_dir_.reset(new std::string(log_dir)); #ifdef ARROW_USE_GLOG - int mapped_severity_threshold = GetMappedSeverity(severity_threshold_); + google::LogSeverity mapped_severity_threshold = GetMappedSeverity(severity_threshold_); google::SetStderrLogging(mapped_severity_threshold); // Enable log file if log_dir is not empty. if (!log_dir.empty()) { @@ -171,7 +173,7 @@ void ArrowLog::StartArrowLog(const std::string& app_name, google::SetLogFilenameExtension(app_name_without_path.c_str()); for (int i = static_cast(severity_threshold_); i <= static_cast(ArrowLogLevel::ARROW_FATAL); ++i) { - int level = GetMappedSeverity(static_cast(i)); + google::LogSeverity level = GetMappedSeverity(static_cast(i)); google::SetLogDestination(level, dir_ends_with_slash.c_str()); } } diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 9352ac5c4a938..0f28b0da82fed 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -92,20 +92,24 @@ set(SRC_FILES random_generator_holder.cc ${GANDIVA_PRECOMPILED_CC_PATH}) -set(GANDIVA_OPENSSL_LIBS OpenSSL::Crypto OpenSSL::SSL) +set(GANDIVA_OPENSSL_LIBS ${ARROW_OPENSSL_LIBS}) if(WIN32 AND NOT CMAKE_VERSION VERSION_LESS 3.18) - list(APPEND GANDIVA_OPENSSL_TARGETS OpenSSL::applink) + list(APPEND GANDIVA_OPENSSL_LIBS OpenSSL::applink) endif() set(GANDIVA_SHARED_LINK_LIBS arrow_shared LLVM::LLVM_HEADERS) -set(GANDIVA_SHARED_PRIVATE_LINK_LIBS LLVM::LLVM_LIBS ${GANDIVA_OPENSSL_TARGETS} +set(GANDIVA_SHARED_PRIVATE_LINK_LIBS LLVM::LLVM_LIBS ${GANDIVA_OPENSSL_LIBS} Boost::headers) set(GANDIVA_STATIC_LINK_LIBS arrow_static LLVM::LLVM_HEADERS LLVM::LLVM_LIBS - ${GANDIVA_OPENSSL_TARGETS} + ${GANDIVA_OPENSSL_LIBS} Boost::headers) +if(ARROW_USE_XSIMD) + list(APPEND GANDIVA_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD}) + list(APPEND GANDIVA_STATIC_LINK_LIBS ${ARROW_XSIMD}) +endif() if(ARROW_WITH_RE2) list(APPEND GANDIVA_SHARED_PRIVATE_LINK_LIBS re2::re2) list(APPEND GANDIVA_STATIC_LINK_LIBS re2::re2) @@ -117,8 +121,7 @@ endif() if(ARROW_GANDIVA_STATIC_LIBSTDCPP AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )) - set(GANDIVA_STATIC_LINK_LIBS ${GANDIVA_STATIC_LINK_LIBS} -static-libstdc++ - -static-libgcc) + list(APPEND GANDIVA_STATIC_LINK_LIBS -static-libstdc++ -static-libgcc) endif() # if (MSVC) @@ -130,10 +133,8 @@ endif() # endforeach() # endif() if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT) - set(GANDIVA_VERSION_SCRIPT_FLAGS - "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map") - set(GANDIVA_SHARED_LINK_FLAGS - "${GANDIVA_SHARED_LINK_FLAGS} ${GANDIVA_VERSION_SCRIPT_FLAGS}") + string(APPEND GANDIVA_SHARED_LINK_FLAGS + " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map") endif() add_arrow_lib(gandiva @@ -148,7 +149,6 @@ add_arrow_lib(gandiva OUTPUTS GANDIVA_LIBRARIES DEPENDENCIES - arrow_dependencies precompiled SHARED_LINK_FLAGS ${GANDIVA_SHARED_LINK_FLAGS} @@ -178,16 +178,18 @@ add_dependencies(gandiva ${GANDIVA_LIBRARIES}) arrow_install_all_headers("gandiva") -set(GANDIVA_STATIC_TEST_LINK_LIBS gandiva_static ${ARROW_TEST_LINK_LIBS}) -set(GANDIVA_SHARED_TEST_LINK_LIBS gandiva_shared ${ARROW_TEST_LINK_LIBS} LLVM::LLVM_LIBS) +set(GANDIVA_STATIC_TEST_LINK_LIBS gandiva_static arrow_testing_static) +set(GANDIVA_SHARED_TEST_LINK_LIBS gandiva_shared arrow_testing_shared LLVM::LLVM_LIBS) if(ARROW_WITH_UTF8PROC) list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS utf8proc::utf8proc) list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS utf8proc::utf8proc) endif() if(WIN32) - list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS ${GANDIVA_OPENSSL_TARGETS}) - list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS ${GANDIVA_OPENSSL_TARGETS}) + list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS ${GANDIVA_OPENSSL_LIBS}) + list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS ${GANDIVA_OPENSSL_LIBS}) endif() +list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS ${ARROW_GTEST_GMOCK} ${ARROW_GTEST_GTEST_MAIN}) +list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS ${ARROW_GTEST_GMOCK} ${ARROW_GTEST_GTEST_MAIN}) function(ADD_GANDIVA_TEST REL_TEST_NAME) set(options USE_STATIC_LINKING) @@ -265,7 +267,9 @@ add_gandiva_test(internals-test hash_utils_test.cc gdv_function_stubs_test.cc interval_holder_test.cc - tests/test_util.cc) + tests/test_util.cc + EXTRA_LINK_LIBS + re2::re2) add_subdirectory(precompiled) add_subdirectory(tests) diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index e62a8e3d4a375..c092ff4fd011f 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -77,7 +77,8 @@ if(ARROW_BUILD_TESTS) ../decimal_type_util.cc ../decimal_xlarge.cc) target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src) - target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}) + target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS} + Boost::headers) target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1 ARROW_STATIC GANDIVA_STATIC) set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test") diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 04028431ba157..8be5a88c33c55 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -118,7 +118,7 @@ endfunction() # TODO(wesm): Handling of ABI/SO version if(ARROW_BUILD_STATIC) - set(PARQUET_STATIC_LINK_LIBS arrow_static ${ARROW_STATIC_LINK_LIBS}) + set(PARQUET_STATIC_LINK_LIBS arrow_static) set(PARQUET_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) else() set(PARQUET_STATIC_INSTALL_INTERFACE_LIBS) @@ -227,7 +227,11 @@ if(ARROW_HAVE_RUNTIME_AVX2) endif() endif() +set(PARQUET_SHARED_LINK_LIBS) +set(PARQUET_SHARED_PRIVATE_LINK_LIBS) + if(PARQUET_REQUIRE_ENCRYPTION) + list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENSSL_LIBS}) set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc encryption/openssl_internal.cc) # Encryption key management @@ -248,11 +252,11 @@ else() endif() if(NOT PARQUET_MINIMAL_DEPENDENCY) - set(PARQUET_SHARED_LINK_LIBS arrow_shared) + list(APPEND PARQUET_SHARED_LINK_LIBS arrow_shared) # These are libraries that we will link privately with parquet_shared (as they # do not need to be linked transitively by other linkers) - set(PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift) + list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift) # Link publicly with parquet_static (because internal users need to # transitively link all dependencies) @@ -261,6 +265,9 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY) list(APPEND PARQUET_STATIC_INSTALL_INTERFACE_LIBS thrift::thrift) endif() endif() +if(ARROW_WITH_OPENTELEMETRY) + list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) +endif() if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT) set(PARQUET_SHARED_LINK_FLAGS @@ -300,6 +307,15 @@ if(WIN32 AND NOT (ARROW_TEST_LINKAGE STREQUAL "static")) list(APPEND PARQUET_LIBRARIES parquet_test_support) endif() +if(ARROW_TESTING) + add_library(parquet_testing OBJECT test_util.cc) + # Even though this is still just an object library we still need to + # "link" our dependencies so that include paths are configured + # correctly + target_link_libraries(parquet_testing PUBLIC ${ARROW_GTEST_GMOCK}) + list(APPEND PARQUET_TEST_LINK_LIBS parquet_testing) +endif() + if(NOT ARROW_BUILD_SHARED) set(PARQUET_BENCHMARK_LINK_OPTION STATIC_LINK_LIBS parquet_static ${PARQUET_TEST_LINK_LIBS} benchmark::benchmark_main) @@ -354,8 +370,7 @@ add_parquet_test(internals-test metadata_test.cc page_index_test.cc public_api_test.cc - types_test.cc - test_util.cc) + types_test.cc) set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) @@ -366,28 +381,22 @@ add_parquet_test(reader-test level_conversion_test.cc column_scanner_test.cc reader_test.cc - stream_reader_test.cc - test_util.cc) + stream_reader_test.cc) add_parquet_test(writer-test SOURCES column_writer_test.cc file_serialize_test.cc - stream_writer_test.cc - test_util.cc) + stream_writer_test.cc) add_parquet_test(arrow-test SOURCES arrow/arrow_reader_writer_test.cc arrow/arrow_schema_test.cc - arrow/arrow_statistics_test.cc - test_util.cc) + arrow/arrow_statistics_test.cc) -add_parquet_test(arrow-internals-test - SOURCES - arrow/path_internal_test.cc - arrow/reconstruct_internal_test.cc - test_util.cc) +add_parquet_test(arrow-internals-test SOURCES arrow/path_internal_test.cc + arrow/reconstruct_internal_test.cc) if(PARQUET_REQUIRE_ENCRYPTION) add_parquet_test(encryption-test @@ -395,8 +404,7 @@ if(PARQUET_REQUIRE_ENCRYPTION) encryption/write_configurations_test.cc encryption/read_configurations_test.cc encryption/properties_test.cc - encryption/test_encryption_util.cc - test_util.cc) + encryption/test_encryption_util.cc) add_parquet_test(encryption-key-management-test SOURCES encryption/key_management_test.cc @@ -404,13 +412,12 @@ if(PARQUET_REQUIRE_ENCRYPTION) encryption/key_wrapping_test.cc encryption/test_encryption_util.cc encryption/test_in_memory_kms.cc - encryption/two_level_cache_with_expiration_test.cc - test_util.cc) + encryption/two_level_cache_with_expiration_test.cc) endif() # Those tests need to use static linking as they access thrift-generated # symbols which are not exported by parquet.dll on Windows (PARQUET-1420). -add_parquet_test(file_deserialize_test SOURCES file_deserialize_test.cc test_util.cc) +add_parquet_test(file_deserialize_test SOURCES file_deserialize_test.cc) add_parquet_test(schema_test) add_parquet_benchmark(bloom_filter_benchmark SOURCES bloom_filter_benchmark.cc @@ -422,27 +429,3 @@ add_parquet_benchmark(level_conversion_benchmark) add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc benchmark_util.cc) add_parquet_benchmark(arrow/reader_writer_benchmark PREFIX "parquet-arrow") - -if(ARROW_WITH_BROTLI) - add_definitions(-DARROW_WITH_BROTLI) -endif() - -if(ARROW_WITH_BZ2) - add_definitions(-DARROW_WITH_BZ2) -endif() - -if(ARROW_WITH_LZ4) - add_definitions(-DARROW_WITH_LZ4) -endif() - -if(ARROW_WITH_SNAPPY) - add_definitions(-DARROW_WITH_SNAPPY) -endif() - -if(ARROW_WITH_ZLIB) - add_definitions(-DARROW_WITH_ZLIB) -endif() - -if(ARROW_WITH_ZSTD) - add_definitions(-DARROW_WITH_ZSTD) -endif() diff --git a/cpp/src/parquet/column_io_benchmark.cc b/cpp/src/parquet/column_io_benchmark.cc index 48e434a342e72..593765dcd4e0b 100644 --- a/cpp/src/parquet/column_io_benchmark.cc +++ b/cpp/src/parquet/column_io_benchmark.cc @@ -20,6 +20,7 @@ #include "arrow/array.h" #include "arrow/io/memory.h" #include "arrow/testing/random.h" +#include "arrow/util/config.h" #include "parquet/column_reader.h" #include "parquet/column_writer.h" diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index ac4627d69c0f6..3fb224154c4ec 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1478,17 +1478,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, // We skipped the levels by incrementing 'levels_position_'. For values // we do not have a buffer, so we need to read them and throw them away. // First we need to figure out how many present/not-null values there are. - std::shared_ptr<::arrow::ResizableBuffer> valid_bits; - valid_bits = AllocateBuffer(this->pool_); - PARQUET_THROW_NOT_OK(valid_bits->Resize(bit_util::BytesForBits(skipped_records), - /*shrink_to_fit=*/true)); - ValidityBitmapInputOutput validity_io; - validity_io.values_read_upper_bound = skipped_records; - validity_io.valid_bits = valid_bits->mutable_data(); - validity_io.valid_bits_offset = 0; - DefLevelsToBitmap(def_levels() + start_levels_position, skipped_records, - this->leaf_info_, &validity_io); - int64_t values_to_read = validity_io.values_read - validity_io.null_count; + int64_t values_to_read = + std::count(def_levels() + start_levels_position, def_levels() + levels_position_, + this->max_def_level_); // Now that we have figured out number of values to read, we do not need // these levels anymore. We will remove these values from the buffer. diff --git a/cpp/src/parquet/column_reader_benchmark.cc b/cpp/src/parquet/column_reader_benchmark.cc index 61fe397cf1c30..93ab2dfa8c1ac 100644 --- a/cpp/src/parquet/column_reader_benchmark.cc +++ b/cpp/src/parquet/column_reader_benchmark.cc @@ -56,6 +56,7 @@ class BenchmarkHelper { for (const auto& page : pages_) { total_size_ += page->size(); } + total_levels_ = static_cast(num_pages) * levels_per_page; } Int32Reader* ResetColumnReader() { @@ -80,6 +81,8 @@ class BenchmarkHelper { int64_t total_size() const { return total_size_; } + int64_t total_levels() const { return total_levels_; } + private: std::vector> pages_; std::unique_ptr descr_; @@ -88,6 +91,7 @@ class BenchmarkHelper { // Reader for record reader benchmarks. std::shared_ptr record_reader_; int64_t total_size_ = 0; + int64_t total_levels_ = 0; }; // Benchmarks Skip for ColumnReader with the following parameters in order: @@ -165,6 +169,7 @@ static void RecordReaderReadRecords(::benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * helper.total_size()); + state.SetItemsProcessed(state.iterations() * helper.total_levels()); } // Benchmarks SkipRecords for RecordReader with the following parameters in order: @@ -190,6 +195,40 @@ static void RecordReaderSkipRecords(::benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * helper.total_size()); + state.SetItemsProcessed(state.iterations() * helper.total_levels()); +} + +// Benchmarks ReadRecords and SkipRecords for RecordReader with the following parameters +// in order: +// - repetition: 0 for REQUIRED, 1 for OPTIONAL, 2 for REPEATED. +// - batch_size: sets how many values to read/skip at each call. +// - levels_per_page: sets how many levels to read/skip in total. +static void RecordReaderReadAndSkipRecords(::benchmark::State& state) { + const auto repetition = static_cast(state.range(0)); + const auto batch_size = static_cast(state.range(1)); + const auto levels_per_page = static_cast(state.range(2)); + + BenchmarkHelper helper(repetition, /*num_pages=*/16, levels_per_page); + + // Vectors to read the values into. + for (auto _ : state) { + state.PauseTiming(); + // read_dense_for_nullable should not matter for skip. + RecordReader* reader = helper.ResetRecordReader(/*read_dense_for_nullable=*/false); + int64_t records_read = -1; + int64_t records_skipped = -1; + state.ResumeTiming(); + while (records_read != 0 && records_skipped != 0) { + // ReadRecords may buffer some levels which will be skipped by the following + // SkipRecords. + DoNotOptimize(records_read = reader->ReadRecords(batch_size)); + DoNotOptimize(records_skipped = reader->SkipRecords(batch_size)); + reader->Reset(); + } + } + + state.SetBytesProcessed(state.iterations() * helper.total_size()); + state.SetItemsProcessed(state.iterations() * helper.total_levels()); } BENCHMARK(ColumnReaderSkipInt32) @@ -219,6 +258,18 @@ BENCHMARK(RecordReaderReadRecords) ->Args({2, 1000, true}) ->Args({2, 1000, false}); +BENCHMARK(RecordReaderReadAndSkipRecords) + ->ArgNames({"Repetition", "BatchSize", "LevelsPerPage"}) + ->Args({0, 10, 80000}) + ->Args({0, 1000, 80000}) + ->Args({0, 10000, 1000000}) + ->Args({1, 10, 80000}) + ->Args({1, 1000, 80000}) + ->Args({1, 10000, 1000000}) + ->Args({2, 10, 80000}) + ->Args({2, 100, 80000}) + ->Args({2, 10000, 1000000}); + void GenerateLevels(int level_repeats, int max_level, int num_levels, std::vector* levels) { // Generate random levels diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index e2cc24502af5d..a48573966a905 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -1607,6 +1607,33 @@ TEST_P(ByteArrayRecordReaderTest, ReadAndSkipOptional) { record_reader_->Reset(); } +// Test skipping buffered records when reading/skipping more than kMinLevelBatchSize +// levels at a time. +TEST_P(ByteArrayRecordReaderTest, ReadAndBatchSkipOptional) { + MakeRecordReader(/*levels_per_page=*/9000, /*num_pages=*/1); + + // Read 100 records and buffer some records. + ASSERT_EQ(record_reader_->ReadRecords(/*num_records=*/100), 100); + CheckReadValues(0, 100); + record_reader_->Reset(); + + // Skip 3000 records. The buffered records will be skipped. + ASSERT_EQ(record_reader_->SkipRecords(/*num_records=*/3000), 3000); + + // Read 900 records and buffer some records again. + ASSERT_EQ(record_reader_->ReadRecords(/*num_records=*/900), 900); + CheckReadValues(3100, 4000); + record_reader_->Reset(); + + // Skip 3000 records. The buffered records will be skipped. + ASSERT_EQ(record_reader_->SkipRecords(/*num_records=*/3000), 3000); + + // Read 3000 records. Only 2000 records are left to be read. + ASSERT_EQ(record_reader_->ReadRecords(/*num_records=*/3000), 2000); + CheckReadValues(7000, 9000); + record_reader_->Reset(); +} + // Tests reading and skipping an optional FLBA field. // The binary readers only differ in DecodeDense and DecodeSpaced functions, so // testing optional is sufficient in exercising those code paths. diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 86fe0965a6a7f..a8519a0f56861 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -26,6 +26,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_builders.h" +#include "arrow/util/config.h" #include "parquet/column_page.h" #include "parquet/column_reader.h" diff --git a/cpp/src/parquet/file_deserialize_test.cc b/cpp/src/parquet/file_deserialize_test.cc index 6b3c7062fcc4a..9f2857c8194c7 100644 --- a/cpp/src/parquet/file_deserialize_test.cc +++ b/cpp/src/parquet/file_deserialize_test.cc @@ -37,6 +37,7 @@ #include "arrow/status.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/compression.h" +#include "arrow/util/config.h" #include "arrow/util/crc32.h" namespace parquet { diff --git a/cpp/src/parquet/file_serialize_test.cc b/cpp/src/parquet/file_serialize_test.cc index 62e1965418076..fc356d5d24c1c 100644 --- a/cpp/src/parquet/file_serialize_test.cc +++ b/cpp/src/parquet/file_serialize_test.cc @@ -19,6 +19,7 @@ #include #include "arrow/testing/gtest_compat.h" +#include "arrow/util/config.h" #include "parquet/column_reader.h" #include "parquet/column_writer.h" diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 551f62798e3b5..f9c2e06873a22 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -32,6 +32,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/config.h" #include "parquet/column_reader.h" #include "parquet/column_scanner.h" diff --git a/cpp/src/skyhook/CMakeLists.txt b/cpp/src/skyhook/CMakeLists.txt index 67fddec135f99..1b41e340ebd12 100644 --- a/cpp/src/skyhook/CMakeLists.txt +++ b/cpp/src/skyhook/CMakeLists.txt @@ -43,8 +43,11 @@ add_arrow_lib(arrow_skyhook ARROW_SKYHOOK_CLIENT_LIBRARIES SHARED_LINK_LIBS ${ARROW_SKYHOOK_LINK_SHARED} + SHARED_PRIVATE_LINK_LIBS + arrow::flatbuffers STATIC_LINK_LIBS - ${ARROW_SKYHOOK_LINK_STATIC}) + ${ARROW_SKYHOOK_LINK_STATIC} + arrow::flatbuffers) # define the cls library add_arrow_lib(cls_skyhook @@ -54,6 +57,8 @@ add_arrow_lib(cls_skyhook ARROW_SKYHOOK_CLS_LIBRARIES SHARED_LINK_LIBS ${ARROW_SKYHOOK_LINK_SHARED} + SHARED_PRIVATE_LINK_LIBS + arrow::flatbuffers STATIC_LINK_LIBS ${ARROW_SKYHOOK_LINK_STATIC}) diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj index 47b9db2acb155..9a3cf190cc376 100644 --- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj +++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 214553ad1ed22..81492462d0ffe 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -9,7 +9,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index d752c077c5521..df9393515c638 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -9,7 +9,7 @@ - + diff --git a/dev/README.md b/dev/README.md index 24600a67db8c3..db9a10d527334 100644 --- a/dev/README.md +++ b/dev/README.md @@ -108,25 +108,6 @@ Status closed URL https://github.com/apache/arrow/issues/Y ``` -## Verifying Release Candidates - -We have provided a script to assist with verifying release candidates on Linux -and macOS: - -```shell -bash dev/release/verify-release-candidate.sh 0.7.0 0 -``` - -Read the script and check the notes in dev/release for information about system -dependencies. - -On Windows, we have a script that verifies C++ and Python (requires Visual -Studio 2015): - -``` -dev/release/verify-release-candidate.bat apache-arrow-0.7.0.tar.gz -``` - # Integration testing Build the following base image used by multiple tests: diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 57e91e206748d..c85f1f754b997 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -199,7 +199,7 @@ def credentials(self, url, username_from_url, allowed_types): raise CrossbowError(msg) if (allowed_types & - pygit2.credentials.GIT_CREDENTIAL_USERPASS_PLAINTEXT): + pygit2.credentials.CredentialType.USERPASS_PLAINTEXT): return pygit2.UserPass('x-oauth-basic', self.token) else: return None diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py index 162f73ec0ffe0..20d9a16138bac 100644 --- a/dev/archery/archery/docker/cli.py +++ b/dev/archery/archery/docker/cli.py @@ -215,7 +215,7 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build, archery docker run --no-cache conda-python # pass a docker-compose parameter, like the python version - PYTHON=3.8 archery docker run conda-python + PYTHON=3.12 archery docker run conda-python # disable the cache only for the leaf image PANDAS=upstream_devel archery docker run --no-leaf-cache \ diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index e984468bc5052..3525ae0be56a5 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -611,7 +611,7 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True, Scenario( "session_options", description="Ensure Flight SQL Sessions work as expected.", - skip_testers={"JS", "C#", "Rust", "Go"} + skip_testers={"JS", "C#", "Rust"} ), Scenario( "poll_flight_info", diff --git a/dev/archery/setup.py b/dev/archery/setup.py index 2ecc72e04e8aa..23a1600910d04 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -21,6 +21,10 @@ import sys from setuptools import setup, find_packages +# pygit2>=1.14.0 requires python 3.9, so crossbow and all +# both technically require python 3.9 — however we still need to +# support 3.8 when using docker. When 3.8 is EOLed and we bump +# to Python 3.9 this will resolve itself. if sys.version_info < (3, 8): sys.exit('Python < 3.8 is not supported') @@ -29,7 +33,7 @@ extras = { 'benchmark': ['pandas'], - 'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'requests', + 'crossbow': ['github3.py', jinja_req, 'pygit2>=1.14.0', 'requests', 'ruamel.yaml', 'setuptools_scm<8.0.0'], 'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml', 'setuptools_scm'], diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index b9e6a8505b72b..149a2b27ac94a 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -166,7 +166,7 @@ def test_vote [10]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0 [11]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ [12]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md -[13]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates +[13]: https://arrow.apache.org/docs/developers/release_verification.html [14]: #{verify_pr_url || "null"} VOTE end diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh index a3441b23bf539..1bd3c0e19e04e 100755 --- a/dev/release/02-source.sh +++ b/dev/release/02-source.sh @@ -202,7 +202,7 @@ The vote will be open for at least 72 hours. [10]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc} [11]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ [12]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md -[13]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates +[13]: https://arrow.apache.org/docs/developers/release_verification.html [14]: ${verify_pr_url} MAIL echo "---------------------------------------------------------" diff --git a/dev/release/README.md b/dev/release/README.md index e1ecdd4332292..ce1eb82d7eba3 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -19,8 +19,6 @@ ## Release management scripts -To learn more, see the project wiki: +To learn more, see the project documentation: -https://cwiki.apache.org/confluence/display/ARROW/Release+Management+Guide - -and [VERIFY.md](VERIFY.md) +https://arrow.apache.org/docs/developers/release.html diff --git a/dev/release/VERIFY.md b/dev/release/VERIFY.md deleted file mode 100644 index 433e6fcb832fc..0000000000000 --- a/dev/release/VERIFY.md +++ /dev/null @@ -1,113 +0,0 @@ - - -# Verifying Arrow releases - -## Windows - -We've provided a convenience script for verifying the C++ and Python builds on -Windows. Read the comments in `verify-release-candidate.bat` for instructions. - -## Linux and macOS - -We've provided a convenience script for verifying the C++, C#, C GLib, Go, -Java, JavaScript, Ruby and Python builds on Linux and macOS. Read the script -`verify-release-candidate.sh` for further information. - -### C GLib - -You need the followings to verify C GLib build: - - * GLib - * GObject Introspection - * Ruby (not EOL-ed version is required) - * gobject-introspection gem - * test-unit gem - -You can install them by the followings on Debian GNU/Linux and Ubuntu: - -```console -% sudo apt install -y -V libgirepository1.0-dev ruby-dev -% sudo gem install gobject-introspection test-unit -``` - -You can install them by the followings on CentOS 7: - -```console -% sudo yum install -y gobject-introspection-devel -% git clone https://github.com/sstephenson/rbenv.git ~/.rbenv -% git clone https://github.com/sstephenson/ruby-build.git ~/.rbenv/plugins/ruby-build -% echo 'export PATH="$HOME/.rbenv/bin:$PATH"' >> ~/.bash_profile -% echo 'eval "$(rbenv init -)"' >> ~/.bash_profile -% exec ${SHELL} --login -% sudo yum install -y gcc make patch openssl-devel readline-devel zlib-devel -% rbenv install 2.4.2 -% rbenv global 2.4.2 -% gem install gobject-introspection test-unit -``` - -You can install them by the followings on macOS: - -```console -% brew install -y gobject-introspection -% gem install gobject-introspection test-unit -``` - -You need to set `PKG_CONFIG_PATH` to find libffi on macOS: - -```console -% export PKG_CONFIG_PATH=$(brew --prefix libffi)/lib/pkgconfig:$PKG_CONFIG_PATH -``` - -### C++, C#, C GLib, Go, Java, JavaScript, Python, Ruby - -Example scripts to install the dependencies to run the verification -script for verifying the source on Ubuntu 20.04, Rocky Linux 8 and -AlmaLinux 8 are in this folder and named `setup-ubuntu.sh` and -`setup-rhel-rebuilds.sh`. These can be adapted to different -situations. Go and JavaScript are installed by the verification -script in the testing environment. Verifying the apt and yum binaries -additionally requires installation of Docker. - -When verifying the source, by default the verification script will try -to verify all implementations and bindings. Should one of the -verification tests fail, the script will exit before running the other -tests. It can be helpful to repeat the failed test to see if it will -complete, since failures can occur for problems such as slow or failed -download of a dependency from the internet. It is possible to run -specific verification tests by setting environment variables, for example - -```console -% TEST_DEFAULT=0 TEST_SOURCE=1 dev/release/verify-release-candidate.sh 6.0.0 3 -% TEST_DEFAULT=0 TEST_BINARIES=1 dev/release/verify-release-candidate.sh 6.0.0 3 -% TEST_DEFAULT=0 TEST_GO=1 dev/release/verify-release-candidate.sh 6.0.0 3 -% TEST_DEFAULT=0 TEST_YUM=1 dev/release/verify-release-candidate.sh 6.0.0 3 -``` - -It is also possible to use -[Archery](https://arrow.apache.org/docs/developers/archery.html) to run -the verification process in a container, for example - -```console -% archery docker run -e VERIFY_VERSION=6.0.1 -e VERIFY_RC=1 almalinux-verify-rc-source -% archery docker run -e VERIFY_VERSION=6.0.1 -e VERIFY_RC=1 ubuntu-verify-rc-source -``` - -To improve software quality, you are encouraged to verify -on a variety of platforms. diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 086d1fdbe811f..8fe5878254dde 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -90,7 +90,7 @@ jobs: uses: actions/setup-python@v4 with: cache: 'pip' - python-version: 3.8 + python-version: 3.12 - name: Install Archery shell: bash run: pip install -e arrow/dev/archery[all] diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index 8ba95af46af56..a190eea459c56 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -59,7 +59,7 @@ on: uses: actions/setup-python@v4 with: cache: 'pip' - python-version: 3.8 + python-version: 3.12 - name: Set up Python by apt if: runner.os == 'Linux' && runner.arch != 'X64' run: | @@ -85,7 +85,7 @@ on: if: runner.arch == 'X64' uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.12 - name: Set up Python by apt if: runner.os == 'Linux' && runner.arch != 'X64' run: | diff --git a/dev/tasks/r/github.macos-linux.local.yml b/dev/tasks/r/github.macos-linux.local.yml index 045c387b73f60..b221e8c5d8d5b 100644 --- a/dev/tasks/r/github.macos-linux.local.yml +++ b/dev/tasks/r/github.macos-linux.local.yml @@ -58,6 +58,18 @@ jobs: - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true + # CRAN builders have the entire bin here added to the path. This sometimes + # includes things like GNU libtool which name-collide with what we expect + - name: Add R.framework/Resources/bin to the path + if: contains(matrix.os, 'macOS') + run: echo "/Library/Frameworks/R.framework/Resources/bin" >> $GITHUB_PATH + - name : Check whether libtool in R is used + if: contains(matrix.os, 'macOS') + run: | + if [ "$(which libtool)" != "/Library/Frameworks/R.framework/Resources/bin/libtool" ]; then + echo "libtool provided by R isn't found: $(which libtool)" + exit 1 + fi - name: Install dependencies uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index cfc333c6b22f5..c166ea15f3761 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -231,7 +231,7 @@ tasks: # # * On conda-forge the `pyarrow` and `arrow-cpp` packages are built in # the same feedstock as the dependency matrix is the same for them as - # Python and the OS are the main dimension. + # Python and the OS are the main dimension. # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically # generated and to be synced regularly from the feedstock. We have no way # yet to generate them inside the arrow repository automatically. @@ -1055,7 +1055,7 @@ tasks: params: image: conda-cpp-valgrind -{% for ubuntu_version in ["20.04", "22.04"] %} +{% for ubuntu_version in ["20.04", "22.04", "24.04"] %} test-ubuntu-{{ ubuntu_version }}-cpp: ci: github template: docker-tests/github.linux.yml @@ -1073,6 +1073,18 @@ tasks: UBUNTU: 20.04 image: ubuntu-cpp-bundled + test-ubuntu-24.04-cpp-gcc-14: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: "24.04" + GCC_VERSION: 14 + # rapidjson 1.1.0 has an error caught by gcc 14. + # https://github.com/Tencent/rapidjson/issues/718 + flags: -e CC=gcc-14 -e CXX=g++-14 -e RapidJSON_SOURCE=BUNDLED + image: ubuntu-cpp + test-skyhook-integration: ci: github template: docker-tests/github.linux.yml @@ -1281,8 +1293,8 @@ tasks: ci: azure template: r/azure.linux.yml params: - r_org: library - r_image: r-base + r_org: rocker + r_image: r-ver r_tag: latest flags: '-e ARROW_DEPENDENCY_SOURCE=BUNDLED' @@ -1297,8 +1309,8 @@ tasks: ci: azure template: r/azure.linux.yml params: - r_org: library - r_image: r-base + r_org: rocker + r_image: r-ver r_tag: latest flags: '-e ARROW_OFFLINE_BUILD=true' @@ -1324,7 +1336,7 @@ tasks: r_custom_ccache: true {% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"), - ("library", "r-base", "latest"), + ("rocker", "r-ver", "latest"), ("rstudio", "r-base", "4.2-focal"), ("rstudio", "r-base", "4.1-opensuse153")] %} test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}: @@ -1391,8 +1403,8 @@ tasks: ci: azure template: r/azure.linux.yml params: - r_org: library - r_image: r-base + r_org: rocker + r_image: r-ver r_tag: latest flags: "-e LIBARROW_MINIMAL=TRUE" diff --git a/docker-compose.yml b/docker-compose.yml index aec685775aab1..26a42fa13947b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -373,7 +373,7 @@ services: # docker-compose run --rm ubuntu-cpp # Parameters: # ARCH: amd64, arm64v8, s390x, ... - # UBUNTU: 20.04, 22.04 + # UBUNTU: 20.04, 22.04, 24.04 image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp build: context: . diff --git a/docs/source/developers/index.rst b/docs/source/developers/index.rst index 83dc556e1605a..fa63f66516e37 100644 --- a/docs/source/developers/index.rst +++ b/docs/source/developers/index.rst @@ -211,6 +211,13 @@ All participation in the Apache Arrow project is governed by the ASF's To learn about the detailed information on the steps followed to perform a release, see :ref:`release`. +.. dropdown:: Release Verification Process + :animate: fade-in-slide-down + :class-title: sd-fs-5 + :class-container: sd-shadow-none + + To learn how to verify a release, see :ref:`release_verification`. + .. toctree:: :maxdepth: 2 :hidden: @@ -226,3 +233,4 @@ All participation in the Apache Arrow project is governed by the ASF's benchmarks documentation release + release_verification diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index 0ff8e3a824ffc..1ecf747e36379 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -200,6 +200,8 @@ Verify the Release # on dev@arrow.apache.org. To regenerate the email template use SOURCE_DEFAULT=0 SOURCE_VOTE=1 dev/release/02-source.sh +See :ref:`release_verification` for details. + Voting and approval =================== diff --git a/docs/source/developers/release_verification.rst b/docs/source/developers/release_verification.rst new file mode 100644 index 0000000000000..53c8f54e5b5bd --- /dev/null +++ b/docs/source/developers/release_verification.rst @@ -0,0 +1,144 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _release_verification: + +============================ +Release Verification Process +============================ + +This page provides detailed information on the steps followed to perform +a release verification on the major platforms. + +Principles +========== + +The Apache Arrow Release Approval process follows the guidelines defined at the +`Apache Software Foundation Release Approval `_. + +For a release vote to pass, a minimum of three positive binding votes and more +positive binding votes than negative binding votes MUST be cast. +Releases may not be vetoed. Votes cast by PMC members are binding, however, +non-binding votes are greatly encouraged and a sign of a healthy project. + +Running the release verification +================================ + +Linux and macOS +--------------- + +In order to run the verification script either for the source release or the +binary artifacts see the following guidelines: + +.. code-block:: + + # this will create and automatically clean up a temporary directory for the verification environment and will run the source verification + TEST_DEFAULT=0 TEST_SOURCE=1 verify-release-candidate.sh $VERSION $RC_NUM + + # this will create and automatically clean up a temporary directory for the verification environment and will run the binary verification + TEST_DEFAULT=0 TEST_BINARIES=1 dev/release/verify-release-candidate.sh $VERSION $RC_NUM + + # to verify only certain implementations use the TEST_DEFAULT=0 and TEST_* variables + # here are a couple of examples, but see the source code for the available options + TEST_DEFAULT=0 TEST_CPP=1 verify-release-candidate.sh $VERSION $RC_NUM # only C++ tests + TEST_DEFAULT=0 TEST_CPP=1 TEST_PYTHON=1 verify-release-candidate.sh $VERSION $RC_NUM # C++ and Python tests + TEST_DEFAULT=0 TEST_INTEGRATION_CPP=1 TEST_INTEGRATION_JAVA=1 verify-release-candidate.sh $VERSION $RC_NUM # C++ and Java integration tests + + # to verify certain binaries use the TEST_* variables as: + TEST_DEFAULT=0 TEST_WHEELS=1 verify-release-candidate.sh $VERSION $RC_NUM # only Wheels + TEST_DEFAULT=0 TEST_APT=1 verify-release-candidate.sh $VERSION $RC_NUM # only APT packages + TEST_DEFAULT=0 TEST_YUM=1 verify-release-candidate.sh $VERSION $RC_NUM # only YUM packages + TEST_DEFAULT=0 TEST_JARS=1 verify-release-candidate.sh $VERSION $RC_NUM # only JARS + +Windows +------- + +In order to run the verification script on Windows you have to download +the source tarball from the SVN dist system that you wish to verify: + +.. code-block:: + + dev\release\verify-release-candidate.bat %VERSION% %RC_NUM% + +System Configuration Instructions +================================= + +You will need some tools installed like curl, git, etcetera. + +Ubuntu +------ + +You might have to install some packages on your system. The following +utility script can be used to set your Ubuntu system. This wil install +the required packages to perform a source verification on a clean +Ubuntu: + +.. code-block:: + + # From the arrow clone + sudo dev/release/setup-ubuntu.sh + +macOS ARM +--------- + +.. code-block:: + + # From the arrow clone + brew install gpg + brew bundle --file=cpp/Brewfile + brew bundle --file=c_glib/Brewfile + brew uninstall node + # You might need to add node, ruby java and maven to the PATH, follow + # instructions from brew after installing. + brew install node@20 + brew install ruby + brew install openjdk + brew install maven + +Windows 11 +---------- + +To be defined + +Casting your vote +================= + +Once you have performed the verification you can cast your vote by responding +to the vote thread on dev@arrow.apache.org and supply your result. + +If the verification was successful you can send your +1 vote. We usually send +along with the vote the command that was executed and the local versions used. +As an example: + +.. code-block:: + +1 + + I've verified successfully the sources and binaries with: + + TEST_DEFAULT=0 TEST_SOURCE=1 dev/release/verify-release-candidate.sh 15.0.0 1 + TEST_DEFAULT=0 TEST_BINARIES=1 dev/release/verify-release-candidate.sh 15.0.0 1 + with: + * Python 3.10.12 + * gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 + * NVIDIA CUDA Build cuda_11.5.r11.5/compiler.30672275_0 + * openjdk version "17.0.9" 2023-10-17 + * ruby 3.0.2p107 (2021-07-07 revision 0db68f0233) [x86_64-linux-gnu] + * dotnet 7.0.115 + * Ubuntu 22.04 LTS + +If there were some issues during verification please report them on the +mail thread to diagnose the issue. diff --git a/docs/source/format/CDataInterface.rst b/docs/source/format/CDataInterface.rst index ef4bf1cf3238d..fd9952b037e75 100644 --- a/docs/source/format/CDataInterface.rst +++ b/docs/source/format/CDataInterface.rst @@ -467,7 +467,10 @@ It has the following fields: Mandatory. The number of physical buffers backing this array. The number of buffers is a function of the data type, as described in the - :ref:`Columnar format specification `. + :ref:`Columnar format specification `, except for the + the binary or utf-8 view type, which has one additional buffer compared + to the Columnar format specification (see + :ref:`c-data-interface-binary-view-arrays`). Buffers of children arrays are not included. @@ -552,6 +555,8 @@ parameterized extension types). The ``ArrowArray`` structure exported from an extension array simply points to the storage data of the extension array. +.. _c-data-interface-binary-view-arrays: + Binary view arrays ------------------ diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst index 86cfab718dd3c..1f055b7f8edb5 100644 --- a/docs/source/format/CanonicalExtensions.rst +++ b/docs/source/format/CanonicalExtensions.rst @@ -25,7 +25,7 @@ Canonical Extension Types Introduction ============ -The Arrow Columnar Format allows defining +The Arrow columnar format allows defining :ref:`extension types ` so as to extend standard Arrow data types with custom semantics. Often these semantics will be specific to a system or application. However, it is beneficial diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index 56cb27626a1f9..7b74b972f2ab8 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -23,7 +23,10 @@ Arrow Columnar Format *Version: 1.4* -The "Arrow Columnar Format" includes a language-agnostic in-memory +.. seealso:: :ref:`Additions to the Arrow columnar format since version 1.0.0 + ` + +The **Arrow columnar format** includes a language-agnostic in-memory data structure specification, metadata serialization, and a protocol for serialization and generic data transport. @@ -359,6 +362,8 @@ will be represented as follows: :: |----------------|-----------------------| | joemark | unspecified (padding) | +.. _variable-size-binary-view-layout: + Variable-size Binary View Layout -------------------------------- @@ -404,6 +409,9 @@ All integers (length, buffer index, and offset) are signed. This layout is adapted from TU Munich's `UmbraDB`_. +Note that this layout uses one additional buffer to store the variadic buffer +lengths in the :ref:`Arrow C data interface `. + .. _variable-size-list-layout: Variable-size List Layout @@ -499,9 +507,13 @@ will be represented as follows: :: |-------------------------------|-----------------------| | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | unspecified (padding) | +.. _listview-layout: + ListView Layout ~~~~~~~~~~~~~~~ +.. versionadded:: Arrow Columnar Format 1.4 + The ListView layout is defined by three buffers: a validity bitmap, an offsets buffer, and an additional sizes buffer. Sizes and offsets have the identical bit width and both 32-bit and 64-bit signed integer options are supported. @@ -957,6 +969,8 @@ below. Run-End Encoded Layout ---------------------- +.. versionadded:: Arrow Columnar Format 1.3 + Run-end encoding (REE) is a variation of run-length encoding (RLE). These encodings are well-suited for representing data containing sequences of the same value, called runs. In run-end encoding, each run is represented as a @@ -1232,9 +1246,13 @@ bytes. Since this metadata can be used to communicate in-memory pointer addresses between libraries, it is recommended to set ``size`` to the actual memory size rather than the padded size. +.. _variadic-buffers: + Variadic buffers ---------------- +.. versionadded:: Arrow Columnar Format 1.4 + Some types such as Utf8View are represented using a variable number of buffers. For each such Field in the pre-ordered flattened logical schema, there will be an entry in ``variadicBufferCounts`` to indicate the number of variadic buffers diff --git a/docs/source/format/Versioning.rst b/docs/source/format/Versioning.rst index 4158f6a003e53..7ba01107074d0 100644 --- a/docs/source/format/Versioning.rst +++ b/docs/source/format/Versioning.rst @@ -15,8 +15,9 @@ .. specific language governing permissions and limitations .. under the License. +******************************* Format Versioning and Stability -=============================== +******************************* Starting with version 1.0.0, Apache Arrow uses **two versions** to describe each release of the project: @@ -32,7 +33,7 @@ changes. From 1.0.0 onward, we follow `Semantic Versioning expect most releases to be major library releases. Backward Compatibility ----------------------- +====================== A newer versioned client library will be able to read any data and metadata produced by an older client library. @@ -41,7 +42,7 @@ So long as the **major** format version is not changed, a newer library is backward compatible with an older library. Forward Compatibility ---------------------- +===================== An older client library must be able to either read data generated from a new client library or detect that it cannot properly read the @@ -53,7 +54,7 @@ available in 1.0.0. So long as these features are not used (such as a new logical data type), forward compatibility is preserved. Long-Term Stability -------------------- +=================== A change in the format major version (e.g. from 1.0.0 to 2.0.0) indicates a disruption to these compatibility guarantees in some way. @@ -63,9 +64,44 @@ event and, should this come to pass, we would exercise caution in ensuring that production applications are not harmed. Pre-1.0.0 Versions ------------------- +================== We made no forward or backward compatibility guarantees for versions prior to 1.0.0. However, we made every effort to ensure that new clients can read serialized data produced by library version 0.8.0 and onward. + +.. _post-1-0-0-format-versions: + +Post-1.0.0 Format Versions +========================== + +Since version 1.0.0, there have been four new minor versions and zero new +major versions of the Arrow format. Each new minor version added new features. +When these new features are not used, the new minor format versions are +compatible with format version 1.0.0. The new features added in each minor +format version since 1.0.0 are as follows: + +Version 1.1 +----------- + +* Added 256-bit Decimal type. + +Version 1.2 +----------- + +* Added MonthDayNano interval type. + +Version 1.3 +----------- + +* Added :ref:`run-end-encoded-layout`. + +Version 1.4 +----------- + +* Added :ref:`variable-size-binary-view-layout` and the associated BinaryView + and Utf8View types. +* Added :ref:`listview-layout` and the associated ListView and LargeListView + types. +* Added :ref:`variadic-buffers`. diff --git a/docs/source/status.rst b/docs/source/status.rst index 4bff37c8527fa..9af2fd1921e22 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -21,9 +21,9 @@ Implementation Status The following tables summarize the features available in the various official Arrow libraries. All libraries currently follow version 1.0.0 of the Arrow -format. See :doc:`./format/Versioning` for details about versioning. Unless -otherwise stated, the Python, R, Ruby and C/GLib libraries follow the C++ -Arrow library. +format, or later minor versions that are compatible with version 1.0.0. See +:doc:`./format/Versioning` for details about versioning. Unless otherwise +stated, the Python, R, Ruby and C/GLib libraries follow the C++ Arrow library. Data Types ========== diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go index b4a03410c4fbf..6a45880181043 100644 --- a/go/arrow/array/record.go +++ b/go/arrow/array/record.go @@ -50,7 +50,7 @@ type simpleRecords struct { } // NewRecordReader returns a simple iterator over the given slice of records. -func NewRecordReader(schema *arrow.Schema, recs []arrow.Record) (*simpleRecords, error) { +func NewRecordReader(schema *arrow.Schema, recs []arrow.Record) (RecordReader, error) { rs := &simpleRecords{ refCount: 1, schema: schema, @@ -124,7 +124,7 @@ type simpleRecord struct { // // NewRecord panics if the columns and schema are inconsistent. // NewRecord panics if rows is larger than the height of the columns. -func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) *simpleRecord { +func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) arrow.Record { rec := &simpleRecord{ refCount: 1, schema: schema, diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go index 197179b5ca4c3..2e7bb72d77855 100644 --- a/go/arrow/array/table.go +++ b/go/arrow/array/table.go @@ -99,7 +99,7 @@ type simpleTable struct { // // NewTable panics if the columns and schema are inconsistent. // NewTable panics if rows is larger than the height of the columns. -func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) *simpleTable { +func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) arrow.Table { tbl := simpleTable{ refCount: 1, rows: rows, @@ -136,7 +136,7 @@ func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) *simpleTabl // - len(schema.Fields) != len(data) // - the total length of each column's array slice (ie: number of rows // in the column) aren't the same for all columns. -func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) *simpleTable { +func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) arrow.Table { if len(data) != schema.NumFields() { panic("array/table: mismatch in number of columns and data for creating a table") } @@ -175,7 +175,7 @@ func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) *simpleTable // NewTableFromRecords returns a new basic, non-lazy in-memory table. // // NewTableFromRecords panics if the records and schema are inconsistent. -func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record) *simpleTable { +func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record) arrow.Table { arrs := make([]arrow.Array, len(recs)) cols := make([]arrow.Column, schema.NumFields()) diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go index 312c8a76b6f0e..0063e8dccd9cb 100644 --- a/go/arrow/flight/client.go +++ b/go/arrow/flight/client.go @@ -66,9 +66,12 @@ type Client interface { // in order to use the Handshake endpoints of the service. Authenticate(context.Context, ...grpc.CallOption) error AuthenticateBasicToken(ctx context.Context, username string, password string, opts ...grpc.CallOption) (context.Context, error) - CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (CancelFlightInfoResult, error) + CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (*CancelFlightInfoResult, error) Close() error RenewFlightEndpoint(ctx context.Context, request *RenewFlightEndpointRequest, opts ...grpc.CallOption) (*FlightEndpoint, error) + SetSessionOptions(ctx context.Context, request *SetSessionOptionsRequest, opts ...grpc.CallOption) (*SetSessionOptionsResult, error) + GetSessionOptions(ctx context.Context, request *GetSessionOptionsRequest, opts ...grpc.CallOption) (*GetSessionOptionsResult, error) + CloseSession(ctx context.Context, request *CloseSessionRequest, opts ...grpc.CallOption) (*CloseSessionResult, error) // join the interface from the FlightServiceClient instead of re-defining all // the endpoints here. FlightServiceClient @@ -364,26 +367,14 @@ func ReadUntilEOF(stream FlightService_DoActionClient) error { } } -func (c *client) CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (result CancelFlightInfoResult, err error) { - var action flight.Action - action.Type = CancelFlightInfoActionType - action.Body, err = proto.Marshal(request) - if err != nil { - return - } - stream, err := c.DoAction(ctx, &action, opts...) - if err != nil { - return - } - res, err := stream.Recv() +func (c *client) CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (*CancelFlightInfoResult, error) { + var result CancelFlightInfoResult + err := handleAction(ctx, c, CancelFlightInfoActionType, request, &result, opts...) if err != nil { - return - } - if err = proto.Unmarshal(res.Body, &result); err != nil { - return + return nil, err } - err = ReadUntilEOF(stream) - return + + return &result, err } func (c *client) Close() error { @@ -395,29 +386,68 @@ func (c *client) Close() error { } func (c *client) RenewFlightEndpoint(ctx context.Context, request *RenewFlightEndpointRequest, opts ...grpc.CallOption) (*FlightEndpoint, error) { - var err error - var action flight.Action - action.Type = RenewFlightEndpointActionType - action.Body, err = proto.Marshal(request) + var result FlightEndpoint + err := handleAction(ctx, c, RenewFlightEndpointActionType, request, &result, opts...) if err != nil { return nil, err } - stream, err := c.DoAction(ctx, &action, opts...) + + return &result, err +} + +func (c *client) SetSessionOptions(ctx context.Context, request *SetSessionOptionsRequest, opts ...grpc.CallOption) (*SetSessionOptionsResult, error) { + var result SetSessionOptionsResult + err := handleAction(ctx, c, SetSessionOptionsActionType, request, &result, opts...) if err != nil { return nil, err } - res, err := stream.Recv() + + return &result, err +} + +func (c *client) GetSessionOptions(ctx context.Context, request *GetSessionOptionsRequest, opts ...grpc.CallOption) (*GetSessionOptionsResult, error) { + var result GetSessionOptionsResult + err := handleAction(ctx, c, GetSessionOptionsActionType, request, &result, opts...) if err != nil { return nil, err } - var renewedEndpoint FlightEndpoint - err = proto.Unmarshal(res.Body, &renewedEndpoint) + + return &result, err +} + +func (c *client) CloseSession(ctx context.Context, request *CloseSessionRequest, opts ...grpc.CallOption) (*CloseSessionResult, error) { + var result CloseSessionResult + err := handleAction(ctx, c, CloseSessionActionType, request, &result, opts...) if err != nil { return nil, err } - err = ReadUntilEOF(stream) + + return &result, err +} + +func handleAction[T, U proto.Message](ctx context.Context, client FlightServiceClient, name string, request T, response U, opts ...grpc.CallOption) error { + var ( + action flight.Action + err error + ) + + action.Type = name + action.Body, err = proto.Marshal(request) if err != nil { - return nil, err + return err } - return &renewedEndpoint, nil + stream, err := client.DoAction(ctx, &action, opts...) + if err != nil { + return err + } + res, err := stream.Recv() + if err != nil { + return err + } + err = proto.Unmarshal(res.Body, response) + if err != nil { + return err + } + + return ReadUntilEOF(stream) } diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go index 068bfa84c3144..09e4974ae9bfc 100644 --- a/go/arrow/flight/flightsql/client.go +++ b/go/arrow/flight/flightsql/client.go @@ -584,7 +584,7 @@ func (c *Client) CancelQuery(ctx context.Context, info *flight.FlightInfo, opts return } -func (c *Client) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (flight.CancelFlightInfoResult, error) { +func (c *Client) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (*flight.CancelFlightInfoResult, error) { return c.Client.CancelFlightInfo(ctx, request, opts...) } @@ -592,6 +592,18 @@ func (c *Client) RenewFlightEndpoint(ctx context.Context, request *flight.RenewF return c.Client.RenewFlightEndpoint(ctx, request, opts...) } +func (c *Client) SetSessionOptions(ctx context.Context, request *flight.SetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.SetSessionOptionsResult, error) { + return c.Client.SetSessionOptions(ctx, request, opts...) +} + +func (c *Client) GetSessionOptions(ctx context.Context, request *flight.GetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.GetSessionOptionsResult, error) { + return c.Client.GetSessionOptions(ctx, request, opts...) +} + +func (c *Client) CloseSession(ctx context.Context, request *flight.CloseSessionRequest, opts ...grpc.CallOption) (*flight.CloseSessionResult, error) { + return c.Client.CloseSession(ctx, request, opts...) +} + func (c *Client) BeginTransaction(ctx context.Context, opts ...grpc.CallOption) (*Txn, error) { request := &pb.ActionBeginTransactionRequest{} action, err := packAction(BeginTransactionActionType, request) diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go index f35aeefcf4628..fe4d308f29fc3 100644 --- a/go/arrow/flight/flightsql/client_test.go +++ b/go/arrow/flight/flightsql/client_test.go @@ -60,9 +60,9 @@ func (m *FlightServiceClientMock) AuthenticateBasicToken(_ context.Context, user return args.Get(0).(context.Context), args.Error(1) } -func (m *FlightServiceClientMock) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (flight.CancelFlightInfoResult, error) { +func (m *FlightServiceClientMock) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (*flight.CancelFlightInfoResult, error) { args := m.Called(request, opts) - return args.Get(0).(flight.CancelFlightInfoResult), args.Error(1) + return args.Get(0).(*flight.CancelFlightInfoResult), args.Error(1) } func (m *FlightServiceClientMock) RenewFlightEndpoint(ctx context.Context, request *flight.RenewFlightEndpointRequest, opts ...grpc.CallOption) (*flight.FlightEndpoint, error) { @@ -70,6 +70,21 @@ func (m *FlightServiceClientMock) RenewFlightEndpoint(ctx context.Context, reque return args.Get(0).(*flight.FlightEndpoint), args.Error(1) } +func (m *FlightServiceClientMock) SetSessionOptions(ctx context.Context, request *flight.SetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.SetSessionOptionsResult, error) { + args := m.Called(request, opts) + return args.Get(0).(*flight.SetSessionOptionsResult), args.Error(1) +} + +func (m *FlightServiceClientMock) GetSessionOptions(ctx context.Context, request *flight.GetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.GetSessionOptionsResult, error) { + args := m.Called(request, opts) + return args.Get(0).(*flight.GetSessionOptionsResult), args.Error(1) +} + +func (m *FlightServiceClientMock) CloseSession(ctx context.Context, request *flight.CloseSessionRequest, opts ...grpc.CallOption) (*flight.CloseSessionResult, error) { + args := m.Called(request, opts) + return args.Get(0).(*flight.CloseSessionResult), args.Error(1) +} + func (m *FlightServiceClientMock) Close() error { return m.Called().Error(0) } @@ -639,10 +654,10 @@ func (s *FlightSqlClientSuite) TestCancelFlightInfo() { mockedCancelResult := flight.CancelFlightInfoResult{ Status: flight.CancelStatusCancelled, } - s.mockClient.On("CancelFlightInfo", &request, s.callOpts).Return(mockedCancelResult, nil) + s.mockClient.On("CancelFlightInfo", &request, s.callOpts).Return(&mockedCancelResult, nil) cancelResult, err := s.sqlClient.CancelFlightInfo(context.TODO(), &request, s.callOpts...) s.NoError(err) - s.Equal(mockedCancelResult, cancelResult) + s.Equal(&mockedCancelResult, cancelResult) } func (s *FlightSqlClientSuite) TestRenewFlightEndpoint() { @@ -671,7 +686,7 @@ func (s *FlightSqlClientSuite) TestPreparedStatementLoadFromResult() { result := &pb.ActionCreatePreparedStatementResult{ PreparedStatementHandle: []byte(query), } - + parameterSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "p_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil) result.ParameterSchema = flight.SerializeSchema(parameterSchemaResult, memory.DefaultAllocator) datasetSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "ds_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil) diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go index 65068048ab3d8..ddd75df381e44 100644 --- a/go/arrow/flight/flightsql/driver/driver.go +++ b/go/arrow/flight/flightsql/driver/driver.go @@ -23,6 +23,7 @@ import ( "fmt" "io" "sort" + "sync" "time" "github.com/apache/arrow/go/v16/arrow" @@ -36,36 +37,77 @@ import ( "google.golang.org/grpc/credentials/insecure" ) +const recordChanBufferSizeDefault = 1 + type Rows struct { - schema *arrow.Schema - records []arrow.Record - currentRecord int - currentRow int + // schema stores the row schema, like column names. + schema *arrow.Schema + // recordChan enables async reading from server, while client interates. + recordChan chan arrow.Record + // currentRecord stores a record with n>=0 rows. + currentRecord arrow.Record + // currentRow tracks the position (row) within currentRecord. + currentRow uint64 + // initializedChan prevents the row being used before properly initialized. + initializedChan chan bool + // streamError stores the error that interrupted streaming. + streamError error + streamErrorMux sync.RWMutex + // ctxCancelFunc when called, triggers the streaming cancelation. + ctxCancelFunc context.CancelFunc +} + +func newRows() *Rows { + return &Rows{ + recordChan: make(chan arrow.Record, recordChanBufferSizeDefault), + initializedChan: make(chan bool), + } +} + +func (r *Rows) setStreamError(err error) { + r.streamErrorMux.Lock() + defer r.streamErrorMux.Unlock() + + r.streamError = err +} + +func (r *Rows) getStreamError() error { + r.streamErrorMux.RLock() + defer r.streamErrorMux.RUnlock() + + return r.streamError } // Columns returns the names of the columns. func (r *Rows) Columns() []string { - if len(r.records) == 0 { + if r.schema == nil { return nil } - // All records have the same columns - var cols []string - for _, c := range r.schema.Fields() { - cols = append(cols, c.Name) + // All records have the same columns. + cols := make([]string, len(r.schema.Fields())) + for i, c := range r.schema.Fields() { + cols[i] = c.Name } return cols } +func (r *Rows) releaseRecord() { + if r.currentRecord != nil { + r.currentRecord.Release() + r.currentRecord = nil + } +} + // Close closes the rows iterator. func (r *Rows) Close() error { - for _, rec := range r.records { - rec.Release() - } - r.currentRecord = 0 + r.ctxCancelFunc() // interrupting data streaming. + r.currentRow = 0 + r.releaseRecord() + return nil } @@ -79,28 +121,37 @@ func (r *Rows) Close() error { // should be taken when closing Rows not to modify // a buffer held in dest. func (r *Rows) Next(dest []driver.Value) error { - if r.currentRecord >= len(r.records) { - return io.EOF - } - record := r.records[r.currentRecord] + if r.currentRecord == nil || int64(r.currentRow) >= r.currentRecord.NumRows() { + if err := r.getStreamError(); err != nil { + return err + } + + r.releaseRecord() + + // Get the next record from the channel + var ok bool + if r.currentRecord, ok = <-r.recordChan; !ok { + return io.EOF // Channel closed, no more records + } - if int64(r.currentRow) >= record.NumRows() { - return ErrOutOfRange + r.currentRow = 0 + + // safety double-check + if r.currentRecord == nil || int64(r.currentRow) >= r.currentRecord.NumRows() { + return io.EOF // Channel closed, no more records + } } - for i, arr := range record.Columns() { - v, err := fromArrowType(arr, r.currentRow) + for i, col := range r.currentRecord.Columns() { + v, err := fromArrowType(col, int(r.currentRow)) if err != nil { return err } + dest[i] = v } r.currentRow++ - if int64(r.currentRow) >= record.NumRows() { - r.currentRecord++ - r.currentRow = 0 - } return nil } @@ -226,19 +277,14 @@ func (s *Stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driv return nil, err } - rows := Rows{} - for _, endpoint := range info.Endpoint { - schema, records, err := readEndpoint(ctx, s.client, endpoint) - if err != nil { - return &rows, err - } - if rows.schema == nil { - rows.schema = schema - } - rows.records = append(rows.records, records...) - } + rows := newRows() + ctx, rows.ctxCancelFunc = context.WithCancel(ctx) + + go rows.streamRecordset(ctx, s.client, info.Endpoint) - return &rows, nil + <-rows.initializedChan // waits the rows proper initialization. + + return rows, nil } func (s *Stmt) setParameters(args []driver.NamedValue) error { @@ -462,43 +508,70 @@ func (c *Connection) QueryContext(ctx context.Context, query string, args []driv return nil, err } - rows := Rows{} - for _, endpoint := range info.Endpoint { - schema, records, err := readEndpoint(ctx, c.client, endpoint) - if err != nil { - return &rows, err - } - if rows.schema == nil { - rows.schema = schema - } - rows.records = append(rows.records, records...) - } + rows := newRows() + ctx, rows.ctxCancelFunc = context.WithCancel(ctx) - return &rows, nil + go rows.streamRecordset(ctx, c.client, info.Endpoint) + <-rows.initializedChan // waits the rows proper initialization. + + return rows, nil } -func readEndpoint(ctx context.Context, client *flightsql.Client, endpoint *flight.FlightEndpoint) (*arrow.Schema, []arrow.Record, error) { - reader, err := client.DoGet(ctx, endpoint.GetTicket()) - if err != nil { - return nil, nil, fmt.Errorf("getting ticket failed: %w", err) - } - defer reader.Release() +func (r *Rows) streamRecordset(ctx context.Context, c *flightsql.Client, endpoints []*flight.FlightEndpoint) { + defer close(r.recordChan) + + // initializeOnceOnly ensures the {r.initializedChan} is valued once only, preventing a deadlock. + initializeOnceOnly := &sync.Once{} + + defer func() { // in case of error, init anyway. + initializeOnceOnly.Do(func() { r.initializedChan <- true }) + }() - schema := reader.Schema() - var records []arrow.Record - for reader.Next() { - if record := reader.Record(); record.NumRows() > 0 { - record.Retain() - records = append(records, record) + // reads each endpoint. + for _, endpoint := range endpoints { + if ctx.Err() != nil { + r.setStreamError(fmt.Errorf("recordset streaming interrupted by context error: %w", ctx.Err())) + return } - } - if err := reader.Err(); err != nil && !errors.Is(err, io.EOF) { - return nil, nil, err - } + func() { // with a func() is possible to {defer reader.Release()}. + reader, err := c.DoGet(ctx, endpoint.GetTicket()) + if err != nil { + r.setStreamError(fmt.Errorf("getting ticket failed: %w", err)) + return + } + + defer reader.Release() + + r.schema = reader.Schema() + + // reads each record into a blocking channel + for reader.Next() { + if ctx.Err() != nil { + r.setStreamError(fmt.Errorf("recordset streaming interrupted by context error: %w", ctx.Err())) + return + } - return schema, records, nil + record := reader.Record() + record.Retain() + + if record.NumRows() < 1 { + record.Release() + continue + } + + r.recordChan <- record + + go initializeOnceOnly.Do(func() { r.initializedChan <- true }) + } + + if err := reader.Err(); err != nil && !errors.Is(err, io.EOF) { + r.setStreamError(err) + return + } + }() + } } // Close invalidates and potentially stops any current diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go index 58193626f51dc..79955f6099f8a 100644 --- a/go/arrow/flight/flightsql/driver/driver_test.go +++ b/go/arrow/flight/flightsql/driver/driver_test.go @@ -24,6 +24,7 @@ import ( "database/sql" "errors" "fmt" + "math/rand" "os" "strings" "sync" @@ -381,6 +382,928 @@ func (s *SqlTestSuite) TestPreparedQuery() { wg.Wait() } +// TestRowsManualPrematureClose tests concurrent rows implementation for closing right after loading. +// Is expected that rows' internal engine update its status, preventing errors and inconsistent further operations. +func (s *SqlTestSuite) TestRowsManualPrematureClose() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsManualPrematureClose` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount int = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + rows, err := db.QueryContext(context.TODO(), sqlSelectAll) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + // Close Rows normally + require.NoError(t, rows.Close()) + + require.False(t, rows.Next()) + + // Safe double-closing + require.NoError(t, rows.Close()) + + // Columns() should return an error after rows.Close() (sql: Rows are closed) + columns, err := rows.Columns() + require.Error(t, err) + require.Empty(t, columns) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsNormalExhaustion tests concurrent rows implementation for normal query/netx/close operation +func (s *SqlTestSuite) TestRowsNormalExhaustion() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsNormalExhaustion` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount int = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do Query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + rows, err := db.QueryContext(ctx, sqlSelectAll) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + var ( + actualCount = 0 + xid, + xvalue int + xname string + ) + + for rows.Next() { + require.NoError(t, rows.Scan(&xid, &xname, &xvalue)) + actualCount++ + } + + require.Equal(t, rowCount, actualCount) + require.NoError(t, rows.Close()) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsPrematureCloseDuringNextLoop ensures that: +// - closing during Next() loop doesn't trigger concurrency errors. +// - the interation is properly/promptly interrupted. +func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() { + t := s.T() + + // Create and start the server. + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table. + const tableName = `TestRowsPrematureCloseDuringNextLoop` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + rows, err := db.QueryContext(context.TODO(), sqlSelectAll) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + const closeAfterNRows = 10 + var ( + i, + xid, + xvalue int + xname string + ) + + for rows.Next() { + err = rows.Scan(&xid, &xname, &xvalue) + require.NoError(t, err) + + i++ + if i >= closeAfterNRows { + require.NoError(t, rows.Close()) + } + } + + require.Equal(t, closeAfterNRows, i) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsInterruptionByContextManualCancellation cancels the context before it starts retrieving rows.Next(). +// it gives time for cancellation propagation, and ensures that no further data was retrieved. +func (s *SqlTestSuite) TestRowsInterruptionByContextManualCancellation() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsInterruptionByContextManualCancellation` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + rows, err := db.QueryContext(ctx, sqlSelectAll) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + defer rows.Close() + + go cancel() + + time.Sleep(100 * time.Millisecond) + + count := 0 + for rows.Next() { + count++ + } + + require.Zero(t, count) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsInterruptionByContextTimeout forces a timeout, and ensures no further data is retrieved after that. +func (s *SqlTestSuite) TestRowsInterruptionByContextTimeout() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsInterruptionByContextTimeout` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const ( + timeout = 1500 * time.Millisecond + sqlSelectAll = `SELECT id, name, value FROM ` + tableName + ) + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + rows, err := db.QueryContext(ctx, sqlSelectAll) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + defer rows.Close() + + // eventually, after time.Sleep(), the context will be cancelled. + // then, rows.Next() should return false, and <-ctx.Done() will never be tested. + for rows.Next() { + select { + case <-ctx.Done(): + t.Fatal("cancellation didn't prevent more records to be read") + default: + time.Sleep(time.Second) + } + } + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsManualPrematureCloseStmt tests concurrent rows implementation for closing right after loading. +// Is expected that rows' internal engine update its status, preventing errors and inconsistent further operations. +func (s *SqlTestSuite) TestRowsManualPrematureCloseStmt() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsManualPrematureCloseStmt` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount int = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + stmt, err := db.PrepareContext(ctx, sqlSelectAll) + require.NoError(t, err) + + rows, err := stmt.QueryContext(ctx) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + // Close Rows normally + require.NoError(t, rows.Close()) + + require.False(t, rows.Next()) + + // Safe double-closing + require.NoError(t, rows.Close()) + + // Columns() should return an error after rows.Close() (sql: Rows are closed) + columns, err := rows.Columns() + require.Error(t, err) + require.Empty(t, columns) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsNormalExhaustionStmt tests concurrent rows implementation for normal query/netx/close operation +func (s *SqlTestSuite) TestRowsNormalExhaustionStmt() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsNormalExhaustionStmt` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount int = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do Query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + stmt, err := db.PrepareContext(ctx, sqlSelectAll) + require.NoError(t, err) + + rows, err := stmt.QueryContext(ctx) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + var ( + actualCount = 0 + xid, + xvalue int + xname string + ) + + for rows.Next() { + require.NoError(t, rows.Scan(&xid, &xname, &xvalue)) + actualCount++ + } + + require.Equal(t, rowCount, actualCount) + require.NoError(t, rows.Close()) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsPrematureCloseDuringNextLoopStmt ensures that: +// - closing during Next() loop doesn't trigger concurrency errors. +// - the interation is properly/promptly interrupted. +func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoopStmt() { + t := s.T() + + // Create and start the server. + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table. + const tableName = `TestRowsPrematureCloseDuringNextLoopStmt` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + stmt, err := db.PrepareContext(ctx, sqlSelectAll) + require.NoError(t, err) + + rows, err := stmt.QueryContext(ctx) + + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + const closeAfterNRows = 10 + var ( + i, + xid, + xvalue int + xname string + ) + + for rows.Next() { + err = rows.Scan(&xid, &xname, &xvalue) + require.NoError(t, err) + + i++ + if i >= closeAfterNRows { + require.NoError(t, rows.Close()) + } + } + + require.Equal(t, closeAfterNRows, i) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsInterruptionByContextManualCancellationStmt cancels the context before it starts retrieving rows.Next(). +// it gives time for cancellation propagation, and ensures that no further data was retrieved. +func (s *SqlTestSuite) TestRowsInterruptionByContextManualCancellationStmt() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsInterruptionByContextManualCancellationStmt` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const sqlSelectAll = `SELECT id, name, value FROM ` + tableName + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + stmt, err := db.PrepareContext(ctx, sqlSelectAll) + require.NoError(t, err) + + rows, err := stmt.QueryContext(ctx) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + defer rows.Close() + + go cancel() + + time.Sleep(100 * time.Millisecond) + + count := 0 + for rows.Next() { + count++ + } + + require.Zero(t, count) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + +// TestRowsInterruptionByContextTimeoutStmt forces a timeout, and ensures no further data is retrieved after that. +func (s *SqlTestSuite) TestRowsInterruptionByContextTimeoutStmt() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + + defer s.stopServer(server) + + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + + defer db.Close() + + // Create the table + const tableName = `TestRowsInterruptionByContextTimeoutStmt` + const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);` + + _, err = db.Exec(ddlCreateTable) + require.NoError(t, err) + + // generate data enough for chunked concurrent test: + const rowCount = 6000 + const randStringLen = 250 + const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES ` + + gen := rand.New(rand.NewSource(time.Now().UnixNano())) + + var sb strings.Builder + sb.WriteString(sqlInsert) + + for i := 0; i < rowCount; i++ { + sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int())) + } + + insertQuery := strings.TrimSuffix(sb.String(), ",") + + rs, err := db.Exec(insertQuery) + require.NoError(t, err) + + insertedRows, err := rs.RowsAffected() + require.NoError(t, err) + require.Equal(t, int64(rowCount), insertedRows) + + // Do query + const ( + timeout = 1500 * time.Millisecond + sqlSelectAll = `SELECT id, name, value FROM ` + tableName + ) + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + stmt, err := db.PrepareContext(ctx, sqlSelectAll) + require.NoError(t, err) + + rows, err := stmt.QueryContext(ctx) + require.NoError(t, err) + require.NotNil(t, rows) + require.NoError(t, rows.Err()) + + defer rows.Close() + + // eventually, after time.Sleep(), the context will be cancelled. + // then, rows.Next() should return false, and <-ctx.Done() will never be tested. + for rows.Next() { + select { + case <-ctx.Done(): + t.Fatal("cancellation didn't prevent more records to be read") + default: + time.Sleep(time.Second) + } + } + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + func (s *SqlTestSuite) TestPreparedQueryWithConstraint() { t := s.T() @@ -918,3 +1841,17 @@ func (s *MockServer) GetFlightInfoStatement(_ context.Context, query flightsql.S TotalBytes: -1, }, nil } + +const getRandomStringCharset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789. " + +var getRandomStringCharsetLen = len(getRandomStringCharset) + +func getRandomString(gen *rand.Rand, length int) string { + result := make([]byte, length) + + for i := range result { + result[i] = getRandomStringCharset[rand.Intn(getRandomStringCharsetLen)] + } + + return string(result) +} diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go index b825f121f3a16..7bc15ab4295f1 100644 --- a/go/arrow/flight/flightsql/server.go +++ b/go/arrow/flight/flightsql/server.go @@ -548,6 +548,18 @@ func (BaseServer) EndSavepoint(context.Context, ActionEndSavepointRequest) error return status.Error(codes.Unimplemented, "EndSavepoint not implemented") } +func (BaseServer) SetSessionOptions(context.Context, *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error) { + return nil, status.Error(codes.Unimplemented, "SetSessionOptions not implemented") +} + +func (BaseServer) GetSessionOptions(context.Context, *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error) { + return nil, status.Error(codes.Unimplemented, "GetSessionOptions not implemented") +} + +func (BaseServer) CloseSession(context.Context, *flight.CloseSessionRequest) (*flight.CloseSessionResult, error) { + return nil, status.Error(codes.Unimplemented, "CloseSession not implemented") +} + // Server is the required interface for a FlightSQL server. It is implemented by // BaseServer which must be embedded in any implementation. The default // implementation by BaseServer for each of these (except GetSqlInfo) @@ -676,6 +688,12 @@ type Server interface { PollFlightInfoSubstraitPlan(context.Context, StatementSubstraitPlan, *flight.FlightDescriptor) (*flight.PollInfo, error) // PollFlightInfoPreparedStatement handles polling for query execution. PollFlightInfoPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.PollInfo, error) + // SetSessionOptions sets option(s) for the current server session. + SetSessionOptions(context.Context, *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error) + // GetSessionOptions gets option(s) for the current server session. + GetSessionOptions(context.Context, *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error) + // CloseSession closes/invalidates the current server session. + CloseSession(context.Context, *flight.CloseSessionRequest) (*flight.CloseSessionResult, error) mustEmbedBaseServer() } @@ -1262,6 +1280,69 @@ func (f *flightSqlServer) DoAction(cmd *flight.Action, stream flight.FlightServi } return stream.Send(&pb.Result{}) + case flight.SetSessionOptionsActionType: + var ( + request flight.SetSessionOptionsRequest + err error + ) + + if err = proto.Unmarshal(cmd.Body, &request); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to unmarshal SetSessionOptionsRequest: %s", err.Error()) + } + + response, err := f.srv.SetSessionOptions(stream.Context(), &request) + if err != nil { + return err + } + + out := &pb.Result{} + out.Body, err = proto.Marshal(response) + if err != nil { + return err + } + return stream.Send(out) + case flight.GetSessionOptionsActionType: + var ( + request flight.GetSessionOptionsRequest + err error + ) + + if err = proto.Unmarshal(cmd.Body, &request); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to unmarshal GetSessionOptionsRequest: %s", err.Error()) + } + + response, err := f.srv.GetSessionOptions(stream.Context(), &request) + if err != nil { + return err + } + + out := &pb.Result{} + out.Body, err = proto.Marshal(response) + if err != nil { + return err + } + return stream.Send(out) + case flight.CloseSessionActionType: + var ( + request flight.CloseSessionRequest + err error + ) + + if err = proto.Unmarshal(cmd.Body, &request); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to unmarshal CloseSessionRequest: %s", err.Error()) + } + + response, err := f.srv.CloseSession(stream.Context(), &request) + if err != nil { + return err + } + + out := &pb.Result{} + out.Body, err = proto.Marshal(response) + if err != nil { + return err + } + return stream.Send(out) default: return status.Error(codes.InvalidArgument, "the defined request is invalid.") } diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go index 22bbe3f8154b2..df619e7a24140 100644 --- a/go/arrow/flight/flightsql/server_test.go +++ b/go/arrow/flight/flightsql/server_test.go @@ -27,11 +27,14 @@ import ( "github.com/apache/arrow/go/v16/arrow/flight" "github.com/apache/arrow/go/v16/arrow/flight/flightsql" pb "github.com/apache/arrow/go/v16/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v16/arrow/flight/session" "github.com/apache/arrow/go/v16/arrow/memory" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -130,6 +133,51 @@ func (*testServer) DoGetStatement(ctx context.Context, ticket flightsql.Statemen return } +func (*testServer) SetSessionOptions(ctx context.Context, req *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error) { + session, err := session.GetSessionFromContext(ctx) + if err != nil { + return nil, err + } + + errors := make(map[string]*flight.SetSessionOptionsResultError) + for key, val := range req.GetSessionOptions() { + if key == "lol_invalid" { + errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidName} + continue + } + if val.GetStringValue() == "lol_invalid" { + errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidValue} + continue + } + + session.SetSessionOption(key, val) + } + + return &flight.SetSessionOptionsResult{Errors: errors}, nil +} + +func (*testServer) GetSessionOptions(ctx context.Context, req *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error) { + session, err := session.GetSessionFromContext(ctx) + if err != nil { + return nil, err + } + + return &flight.GetSessionOptionsResult{SessionOptions: session.GetSessionOptions()}, nil +} + +func (*testServer) CloseSession(ctx context.Context, req *flight.CloseSessionRequest) (*flight.CloseSessionResult, error) { + session, err := session.GetSessionFromContext(ctx) + if err != nil { + return nil, err + } + + if err = session.Close(); err != nil { + return nil, err + } + + return &flight.CloseSessionResult{Status: flight.CloseSessionResultClosed}, nil +} + type FlightSqlServerSuite struct { suite.Suite @@ -423,8 +471,7 @@ func (s *UnimplementedFlightSqlServerSuite) TestDoAction() { func (s *UnimplementedFlightSqlServerSuite) TestCancelFlightInfo() { request := flight.CancelFlightInfoRequest{} result, err := s.cl.CancelFlightInfo(context.TODO(), &request) - s.Equal(flight.CancelFlightInfoResult{Status: flight.CancelStatusUnspecified}, - result) + s.Nil(result) st, ok := status.FromError(err) s.True(ok) s.Equal(codes.Unimplemented, st.Code()) @@ -442,7 +489,403 @@ func (s *UnimplementedFlightSqlServerSuite) TestRenewFlightEndpoint() { s.Equal("RenewFlightEndpoint not implemented", st.Message()) } +func (s *UnimplementedFlightSqlServerSuite) TestSetSessionOptions() { + opts, err := flight.NewSessionOptionValues(map[string]any{ + "key": "val", + }) + s.NoError(err) + res, err := s.cl.SetSessionOptions(context.TODO(), &flight.SetSessionOptionsRequest{SessionOptions: opts}) + s.Nil(res) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal("SetSessionOptions not implemented", st.Message()) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetSessionOptions() { + res, err := s.cl.GetSessionOptions(context.TODO(), &flight.GetSessionOptionsRequest{}) + s.Nil(res) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal("GetSessionOptions not implemented", st.Message()) +} + +func (s *UnimplementedFlightSqlServerSuite) TestCloseSession() { + res, err := s.cl.CloseSession(context.TODO(), &flight.CloseSessionRequest{}) + s.Nil(res) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal("CloseSession not implemented", st.Message()) +} + +type FlightSqlServerSessionSuite struct { + suite.Suite + + s flight.Server + cl *flightsql.Client + + sessionManager session.ServerSessionManager +} + +func (s *FlightSqlServerSessionSuite) SetupSuite() { + s.s = flight.NewServerWithMiddleware([]flight.ServerMiddleware{ + flight.CreateServerMiddleware(session.NewServerSessionMiddleware(s.sessionManager)), + }) + srv := flightsql.NewFlightServer(&testServer{}) + s.s.RegisterFlightService(srv) + s.s.Init("localhost:0") + + go s.s.Serve() +} + +func (s *FlightSqlServerSessionSuite) TearDownSuite() { + s.s.Shutdown() +} + +func (s *FlightSqlServerSessionSuite) SetupTest() { + middleware := []flight.ClientMiddleware{ + flight.NewClientCookieMiddleware(), + } + cl, err := flightsql.NewClient(s.s.Addr().String(), nil, middleware, dialOpts...) + s.Require().NoError(err) + s.cl = cl +} + +func (s *FlightSqlServerSessionSuite) TearDownTest() { + s.Require().NoError(s.cl.Close()) + s.cl = nil +} + +func (s *FlightSqlServerSessionSuite) TestSetSessionOptions() { + opts, err := flight.NewSessionOptionValues(map[string]any{ + "foolong": int64(123), + "bardouble": 456.0, + "lol_invalid": "this won't get set", + "key_with_invalid_value": "lol_invalid", + "big_ol_string_list": []string{"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"}, + }) + s.NoError(err) + res, err := s.cl.SetSessionOptions(context.TODO(), &flight.SetSessionOptionsRequest{SessionOptions: opts}) + s.NoError(err) + s.NotNil(res) + + expectedErrs := map[string]*flight.SetSessionOptionsResultError{ + "lol_invalid": {Value: flight.SetSessionOptionsResultErrorInvalidName}, + "key_with_invalid_value": {Value: flight.SetSessionOptionsResultErrorInvalidValue}, + } + + errs := res.GetErrors() + s.Equal(len(expectedErrs), len(errs)) + + for key, val := range errs { + s.Equal(expectedErrs[key], val) + } +} + +func (s *FlightSqlServerSessionSuite) TestGetSetGetSessionOptions() { + ctx := context.TODO() + getRes, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + s.NoError(err) + s.NotNil(getRes) + s.Len(getRes.SessionOptions, 0) + + expectedOpts := map[string]any{ + "foolong": int64(123), + "bardouble": 456.0, + "big_ol_string_list": []string{"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"}, + } + + optionVals, err := flight.NewSessionOptionValues(expectedOpts) + s.NoError(err) + s.NotNil(optionVals) + + setRes, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals}) + s.NoError(err) + s.NotNil(setRes) + s.Empty(setRes.Errors) + + getRes2, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + s.NoError(err) + s.NotNil(getRes2) + + opts := getRes2.GetSessionOptions() + s.Equal(3, len(opts)) + + s.Equal(expectedOpts["foolong"], opts["foolong"].GetInt64Value()) + s.Equal(expectedOpts["bardouble"], opts["bardouble"].GetDoubleValue()) + s.Equal(expectedOpts["big_ol_string_list"], opts["big_ol_string_list"].GetStringListValue().GetValues()) +} + +func (s *FlightSqlServerSessionSuite) TestSetRemoveSessionOptions() { + ctx := context.TODO() + initialOpts := map[string]any{ + "foolong": int64(123), + "bardouble": 456.0, + "big_ol_string_list": []string{"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"}, + } + + optionVals, err := flight.NewSessionOptionValues(initialOpts) + s.NoError(err) + s.NotNil(optionVals) + + setRes, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals}) + s.NoError(err) + s.NotNil(setRes) + s.Empty(setRes.Errors) + + removeKeyOpts, err := flight.NewSessionOptionValues(map[string]any{ + "foolong": nil, + }) + s.NoError(err) + s.NotNil(removeKeyOpts) + + setRes2, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: removeKeyOpts}) + s.NoError(err) + s.NotNil(setRes2) + s.Empty(setRes2.Errors) + + getRes, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + s.NoError(err) + s.NotNil(getRes) + + opts := getRes.GetSessionOptions() + s.Equal(2, len(opts)) + + s.Equal(initialOpts["bardouble"], opts["bardouble"].GetDoubleValue()) + s.Equal(initialOpts["big_ol_string_list"], opts["big_ol_string_list"].GetStringListValue().GetValues()) +} + +func (s *FlightSqlServerSessionSuite) TestCloseSession() { + ctx := context.TODO() + initialOpts := map[string]any{ + "foolong": int64(123), + "bardouble": 456.0, + "big_ol_string_list": []string{"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"}, + } + + optionVals, err := flight.NewSessionOptionValues(initialOpts) + s.NoError(err) + s.NotNil(optionVals) + + setRes, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals}) + s.NoError(err) + s.NotNil(setRes) + s.Empty(setRes.Errors) + + closeRes, err := s.cl.CloseSession(ctx, &flight.CloseSessionRequest{}) + s.NoError(err) + s.NotNil(closeRes) + s.Equal(flight.CloseSessionResultClosed, closeRes.GetStatus()) + + getRes, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + s.NoError(err) + s.NotNil(getRes) + + opts := getRes.GetSessionOptions() + s.Empty(opts) +} + func TestBaseServer(t *testing.T) { suite.Run(t, new(UnimplementedFlightSqlServerSuite)) suite.Run(t, new(FlightSqlServerSuite)) + suite.Run(t, &FlightSqlServerSessionSuite{sessionManager: session.NewStatefulServerSessionManager()}) + suite.Run(t, &FlightSqlServerSessionSuite{sessionManager: session.NewStatelessServerSessionManager()}) +} + +func TestStatefulServerSessionCookies(t *testing.T) { + // Generate session IDs deterministically + sessionIDGenerator := func(ids []string) func() string { + ch := make(chan string, len(ids)) + for _, id := range ids { + ch <- id + } + close(ch) + + return func() string { + return <-ch + } + } + + factory := session.NewSessionFactory(sessionIDGenerator([]string{"how-now-brown-cow", "unique-new-york"})) + store := session.NewSessionStore() + manager := session.NewStatefulServerSessionManager(session.WithFactory(factory), session.WithStore(store)) + middleware := session.NewServerSessionMiddleware(manager) + + srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{ + flight.CreateServerMiddleware(middleware), + }) + srv.RegisterFlightService(flightsql.NewFlightServer(&testServer{})) + srv.Init("localhost:0") + + go srv.Serve() + defer srv.Shutdown() + + client, err := flightsql.NewClient( + srv.Addr().String(), + nil, + []flight.ClientMiddleware{ + flight.NewClientCookieMiddleware(), + }, + dialOpts..., + ) + require.NoError(t, err) + defer client.Close() + + var ( + trailer metadata.MD + session session.ServerSession + ) + + ctx := context.TODO() + + // Get empty session; should create new session since one doesn't exist + _, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Client should recieve cookie with new session ID + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session_id=how-now-brown-cow", trailer.Get("set-cookie")[0]) + + // Server should add the empty session to its internal store + session, err = store.Get("how-now-brown-cow") + require.NoError(t, err) + require.NotNil(t, session) + require.Empty(t, session.GetSessionOptions()) + + optionVals, err := flight.NewSessionOptionValues(map[string]any{"hello": "world"}) + require.NoError(t, err) + require.NotNil(t, optionVals) + + // Add option to existing session + _, err = client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Server received and used session from existing client cookie, no need to set a new one + require.Len(t, trailer.Get("set-cookie"), 0) + + // The option we set has been added to the server's state + session, err = store.Get("how-now-brown-cow") + require.NoError(t, err) + require.NotNil(t, session) + require.Len(t, session.GetSessionOptions(), 1) + require.Contains(t, session.GetSessionOptions(), "hello") + + // Close the existing session + _, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Inform the client that the cookie should be deleted + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session_id=how-now-brown-cow; Max-Age=0", trailer.Get("set-cookie")[0]) + + // The session has been removed from the server's internal store + session, err = store.Get("how-now-brown-cow") + require.Error(t, err) + require.Nil(t, session) + + // Get the session; this should create a new session because we just closed the previous one + _, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // The client is informed to set a NEW cookie for the newly created session + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session_id=unique-new-york", trailer.Get("set-cookie")[0]) + + // The new empty session has been added to the server's internal store + session, err = store.Get("unique-new-york") + require.NoError(t, err) + require.NotNil(t, session) + require.Empty(t, session.GetSessionOptions()) + + // Close the new session + _, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Inform the client that the new session's cookie should be deleted + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session_id=unique-new-york; Max-Age=0", trailer.Get("set-cookie")[0]) + + // The session has been removed from the server's internal store + session, err = store.Get("unique-new-york") + require.Error(t, err) + require.Nil(t, session) +} + +func TestStatelessServerSessionCookies(t *testing.T) { + manager := session.NewStatelessServerSessionManager() + middleware := session.NewServerSessionMiddleware(manager) + + srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{ + flight.CreateServerMiddleware(middleware), + }) + srv.RegisterFlightService(flightsql.NewFlightServer(&testServer{})) + srv.Init("localhost:0") + + go srv.Serve() + defer srv.Shutdown() + + client, err := flightsql.NewClient( + srv.Addr().String(), + nil, + []flight.ClientMiddleware{ + flight.NewClientCookieMiddleware(), + }, + dialOpts..., + ) + require.NoError(t, err) + defer client.Close() + + var trailer metadata.MD + + ctx := context.TODO() + + // Get empty session; should create new session since one doesn't exist + _, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Client should recieve cookie with new session token. An empty session is serialized with zero bytes. + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session=", trailer.Get("set-cookie")[0]) + + optionVals, err := flight.NewSessionOptionValues(map[string]any{"hello": "world"}) + require.NoError(t, err) + require.NotNil(t, optionVals) + + // Add option to existing session + _, err = client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Session state has been modified, so we send a new cookie with the updated session contents + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, `arrow_flight_session=ChAKBWhlbGxvEgcKBXdvcmxk`, trailer.Get("set-cookie")[0]) // base64 of binary '{"hello":"world"}' proto message + + // Close the existing session + _, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Inform the client that the cookie should be deleted + // + // The cookie is in the gRPC trailer because the session may have been closed AFTER the initial headers were sent + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session=ChAKBWhlbGxvEgcKBXdvcmxk; Max-Age=0", trailer.Get("set-cookie")[0]) + + // Get the session; his should create a new session because we just closed the previous one + // Realistically no session is "created", this just happens because the client was told to drop the cookie + // in the last step. + _, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // The client is informed to set a NEW cookie for the newly created empty session + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session=", trailer.Get("set-cookie")[0]) + + // Close the new session + _, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer)) + require.NoError(t, err) + + // Inform the client that the new session's cookie should be deleted + require.Len(t, trailer.Get("set-cookie"), 1) + require.Equal(t, "arrow_flight_session=; Max-Age=0", trailer.Get("set-cookie")[0]) } diff --git a/go/arrow/flight/gen/flight/Flight.pb.go b/go/arrow/flight/gen/flight/Flight.pb.go index 0438bca28be50..42d4493c8f3e6 100644 --- a/go/arrow/flight/gen/flight/Flight.pb.go +++ b/go/arrow/flight/gen/flight/Flight.pb.go @@ -17,8 +17,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 -// protoc v4.23.4 +// protoc-gen-go v1.31.0 +// protoc v4.25.2 // source: Flight.proto package flight @@ -38,7 +38,6 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) -// // The result of a cancel operation. // // This is used by CancelFlightInfoResult.status. @@ -103,19 +102,17 @@ func (CancelStatus) EnumDescriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{0} } -// // Describes what type of descriptor is defined. type FlightDescriptor_DescriptorType int32 const ( // Protobuf pattern, not used. FlightDescriptor_UNKNOWN FlightDescriptor_DescriptorType = 0 - // // A named path that identifies a dataset. A path is composed of a string // or list of strings describing a particular dataset. This is conceptually - // similar to a path inside a filesystem. - FlightDescriptor_PATH FlightDescriptor_DescriptorType = 1 // + // similar to a path inside a filesystem. + FlightDescriptor_PATH FlightDescriptor_DescriptorType = 1 // An opaque command to generate a dataset. FlightDescriptor_CMD FlightDescriptor_DescriptorType = 2 ) @@ -161,17 +158,132 @@ func (FlightDescriptor_DescriptorType) EnumDescriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{12, 0} } -// +type SetSessionOptionsResult_ErrorValue int32 + +const ( + // Protobuf deserialization fallback value: The status is unknown or unrecognized. + // Servers should avoid using this value. The request may be retried by the client. + SetSessionOptionsResult_UNSPECIFIED SetSessionOptionsResult_ErrorValue = 0 + // The given session option name is invalid. + SetSessionOptionsResult_INVALID_NAME SetSessionOptionsResult_ErrorValue = 1 + // The session option value or type is invalid. + SetSessionOptionsResult_INVALID_VALUE SetSessionOptionsResult_ErrorValue = 2 + // The session option cannot be set. + SetSessionOptionsResult_ERROR SetSessionOptionsResult_ErrorValue = 3 +) + +// Enum value maps for SetSessionOptionsResult_ErrorValue. +var ( + SetSessionOptionsResult_ErrorValue_name = map[int32]string{ + 0: "UNSPECIFIED", + 1: "INVALID_NAME", + 2: "INVALID_VALUE", + 3: "ERROR", + } + SetSessionOptionsResult_ErrorValue_value = map[string]int32{ + "UNSPECIFIED": 0, + "INVALID_NAME": 1, + "INVALID_VALUE": 2, + "ERROR": 3, + } +) + +func (x SetSessionOptionsResult_ErrorValue) Enum() *SetSessionOptionsResult_ErrorValue { + p := new(SetSessionOptionsResult_ErrorValue) + *p = x + return p +} + +func (x SetSessionOptionsResult_ErrorValue) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SetSessionOptionsResult_ErrorValue) Descriptor() protoreflect.EnumDescriptor { + return file_Flight_proto_enumTypes[2].Descriptor() +} + +func (SetSessionOptionsResult_ErrorValue) Type() protoreflect.EnumType { + return &file_Flight_proto_enumTypes[2] +} + +func (x SetSessionOptionsResult_ErrorValue) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SetSessionOptionsResult_ErrorValue.Descriptor instead. +func (SetSessionOptionsResult_ErrorValue) EnumDescriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{22, 0} +} + +type CloseSessionResult_Status int32 + +const ( + // Protobuf deserialization fallback value: The session close status is unknown or + // not recognized. Servers should avoid using this value (send a NOT_FOUND error if + // the requested session is not known or expired). Clients can retry the request. + CloseSessionResult_UNSPECIFIED CloseSessionResult_Status = 0 + // The session close request is complete. Subsequent requests with + // the same session produce a NOT_FOUND error. + CloseSessionResult_CLOSED CloseSessionResult_Status = 1 + // The session close request is in progress. The client may retry + // the close request. + CloseSessionResult_CLOSING CloseSessionResult_Status = 2 + // The session is not closeable. The client should not retry the + // close request. + CloseSessionResult_NOT_CLOSEABLE CloseSessionResult_Status = 3 +) + +// Enum value maps for CloseSessionResult_Status. +var ( + CloseSessionResult_Status_name = map[int32]string{ + 0: "UNSPECIFIED", + 1: "CLOSED", + 2: "CLOSING", + 3: "NOT_CLOSEABLE", + } + CloseSessionResult_Status_value = map[string]int32{ + "UNSPECIFIED": 0, + "CLOSED": 1, + "CLOSING": 2, + "NOT_CLOSEABLE": 3, + } +) + +func (x CloseSessionResult_Status) Enum() *CloseSessionResult_Status { + p := new(CloseSessionResult_Status) + *p = x + return p +} + +func (x CloseSessionResult_Status) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CloseSessionResult_Status) Descriptor() protoreflect.EnumDescriptor { + return file_Flight_proto_enumTypes[3].Descriptor() +} + +func (CloseSessionResult_Status) Type() protoreflect.EnumType { + return &file_Flight_proto_enumTypes[3] +} + +func (x CloseSessionResult_Status) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use CloseSessionResult_Status.Descriptor instead. +func (CloseSessionResult_Status) EnumDescriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{26, 0} +} + // The request that a client provides to a server on handshake. type HandshakeRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // A defined protocol version ProtocolVersion uint64 `protobuf:"varint,1,opt,name=protocol_version,json=protocolVersion,proto3" json:"protocol_version,omitempty"` - // // Arbitrary auth/handshake info. Payload []byte `protobuf:"bytes,2,opt,name=payload,proto3" json:"payload,omitempty"` } @@ -227,10 +339,8 @@ type HandshakeResponse struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // A defined protocol version ProtocolVersion uint64 `protobuf:"varint,1,opt,name=protocol_version,json=protocolVersion,proto3" json:"protocol_version,omitempty"` - // // Arbitrary auth/handshake info. Payload []byte `protobuf:"bytes,2,opt,name=payload,proto3" json:"payload,omitempty"` } @@ -281,7 +391,6 @@ func (x *HandshakeResponse) GetPayload() []byte { return nil } -// // A message for doing simple auth. type BasicAuth struct { state protoimpl.MessageState @@ -376,7 +485,6 @@ func (*Empty) Descriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{3} } -// // Describes an available action, including both the name used for execution // along with a short description of the purpose of the action. type ActionType struct { @@ -434,7 +542,6 @@ func (x *ActionType) GetDescription() string { return "" } -// // A service specific expression that can be used to return a limited set // of available Arrow Flight streams. type Criteria struct { @@ -484,7 +591,6 @@ func (x *Criteria) GetExpression() []byte { return nil } -// // An opaque action specific for the service. type Action struct { state protoimpl.MessageState @@ -541,7 +647,6 @@ func (x *Action) GetBody() []byte { return nil } -// // The request of the CancelFlightInfo action. // // The request should be stored in Action.body. @@ -592,7 +697,6 @@ func (x *CancelFlightInfoRequest) GetInfo() *FlightInfo { return nil } -// // The request of the RenewFlightEndpoint action. // // The request should be stored in Action.body. @@ -643,7 +747,6 @@ func (x *RenewFlightEndpointRequest) GetEndpoint() *FlightEndpoint { return nil } -// // An opaque result returned after executing an action. type Result struct { state protoimpl.MessageState @@ -692,7 +795,6 @@ func (x *Result) GetBody() []byte { return nil } -// // The result of the CancelFlightInfo action. // // The result should be stored in Result.body. @@ -743,7 +845,6 @@ func (x *CancelFlightInfoResult) GetStatus() CancelStatus { return CancelStatus_CANCEL_STATUS_UNSPECIFIED } -// // Wrap the result of a getSchema call type SchemaResult struct { state protoimpl.MessageState @@ -751,9 +852,10 @@ type SchemaResult struct { unknownFields protoimpl.UnknownFields // The schema of the dataset in its IPC form: - // 4 bytes - an optional IPC_CONTINUATION_TOKEN prefix - // 4 bytes - the byte length of the payload - // a flatbuffer Message whose header is the Schema + // + // 4 bytes - an optional IPC_CONTINUATION_TOKEN prefix + // 4 bytes - the byte length of the payload + // a flatbuffer Message whose header is the Schema Schema []byte `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"` } @@ -796,7 +898,6 @@ func (x *SchemaResult) GetSchema() []byte { return nil } -// // The name or tag for a Flight. May be used as a way to retrieve or generate // a flight or be used to expose a set of previously defined flights. type FlightDescriptor struct { @@ -805,11 +906,9 @@ type FlightDescriptor struct { unknownFields protoimpl.UnknownFields Type FlightDescriptor_DescriptorType `protobuf:"varint,1,opt,name=type,proto3,enum=arrow.flight.protocol.FlightDescriptor_DescriptorType" json:"type,omitempty"` - // // Opaque value used to express a command. Should only be defined when // type = CMD. Cmd []byte `protobuf:"bytes,2,opt,name=cmd,proto3" json:"cmd,omitempty"` - // // List of strings identifying a particular dataset. Should only be defined // when type = PATH. Path []string `protobuf:"bytes,3,rep,name=path,proto3" json:"path,omitempty"` @@ -868,7 +967,6 @@ func (x *FlightDescriptor) GetPath() []string { return nil } -// // The access coordinates for retrieval of a dataset. With a FlightInfo, a // consumer is able to determine how to retrieve a dataset. type FlightInfo struct { @@ -877,14 +975,13 @@ type FlightInfo struct { unknownFields protoimpl.UnknownFields // The schema of the dataset in its IPC form: - // 4 bytes - an optional IPC_CONTINUATION_TOKEN prefix - // 4 bytes - the byte length of the payload - // a flatbuffer Message whose header is the Schema - Schema []byte `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"` // + // 4 bytes - an optional IPC_CONTINUATION_TOKEN prefix + // 4 bytes - the byte length of the payload + // a flatbuffer Message whose header is the Schema + Schema []byte `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"` // The descriptor associated with this info. FlightDescriptor *FlightDescriptor `protobuf:"bytes,2,opt,name=flight_descriptor,json=flightDescriptor,proto3" json:"flight_descriptor,omitempty"` - // // A list of endpoints associated with the flight. To consume the // whole flight, all endpoints (and hence all Tickets) must be // consumed. Endpoints can be consumed in any order. @@ -904,18 +1001,22 @@ type FlightInfo struct { // ordering is important for an application, an application must // choose one of them: // - // * An application requires that all clients must read data in - // returned endpoints order. - // * An application must return the all data in a single endpoint. + // - An application requires that all clients must read data in + // returned endpoints order. + // - An application must return the all data in a single endpoint. Endpoint []*FlightEndpoint `protobuf:"bytes,3,rep,name=endpoint,proto3" json:"endpoint,omitempty"` // Set these to -1 if unknown. TotalRecords int64 `protobuf:"varint,4,opt,name=total_records,json=totalRecords,proto3" json:"total_records,omitempty"` TotalBytes int64 `protobuf:"varint,5,opt,name=total_bytes,json=totalBytes,proto3" json:"total_bytes,omitempty"` - // // FlightEndpoints are in the same order as the data. Ordered bool `protobuf:"varint,6,opt,name=ordered,proto3" json:"ordered,omitempty"` - // // Application-defined metadata. + // + // There is no inherent or required relationship between this + // and the app_metadata fields in the FlightEndpoints or resulting + // FlightData messages. Since this metadata is application-defined, + // a given application could define there to be a relationship, + // but there is none required by the spec. AppMetadata []byte `protobuf:"bytes,7,opt,name=app_metadata,json=appMetadata,proto3" json:"app_metadata,omitempty"` } @@ -1000,14 +1101,12 @@ func (x *FlightInfo) GetAppMetadata() []byte { return nil } -// // The information to process a long-running query. type PollInfo struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // The currently available results. // // If "flight_descriptor" is not specified, the query is complete @@ -1025,15 +1124,12 @@ type PollInfo struct { // ticket in the info before the query is // completed. FlightInfo.ordered is also valid. Info *FlightInfo `protobuf:"bytes,1,opt,name=info,proto3" json:"info,omitempty"` - // // The descriptor the client should use on the next try. // If unset, the query is complete. FlightDescriptor *FlightDescriptor `protobuf:"bytes,2,opt,name=flight_descriptor,json=flightDescriptor,proto3" json:"flight_descriptor,omitempty"` - // // Query progress. If known, must be in [0.0, 1.0] but need not be // monotonic or nondecreasing. If unknown, do not set. Progress *float64 `protobuf:"fixed64,3,opt,name=progress,proto3,oneof" json:"progress,omitempty"` - // // Expiration time for this request. After this passes, the server // might not accept the retry descriptor anymore (and the query may // be cancelled). This may be updated on a call to PollFlightInfo. @@ -1100,17 +1196,14 @@ func (x *PollInfo) GetExpirationTime() *timestamppb.Timestamp { return nil } -// // A particular stream or split associated with a flight. type FlightEndpoint struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Token used to retrieve this stream. Ticket *Ticket `protobuf:"bytes,1,opt,name=ticket,proto3" json:"ticket,omitempty"` - // // A list of URIs where this ticket can be redeemed via DoGet(). // // If the list is empty, the expectation is that the ticket can only @@ -1126,13 +1219,17 @@ type FlightEndpoint struct { // In other words, an application can use multiple locations to // represent redundant and/or load balanced services. Location []*Location `protobuf:"bytes,2,rep,name=location,proto3" json:"location,omitempty"` - // // Expiration time of this stream. If present, clients may assume // they can retry DoGet requests. Otherwise, it is // application-defined whether DoGet requests may be retried. ExpirationTime *timestamppb.Timestamp `protobuf:"bytes,3,opt,name=expiration_time,json=expirationTime,proto3" json:"expiration_time,omitempty"` - // // Application-defined metadata. + // + // There is no inherent or required relationship between this + // and the app_metadata fields in the FlightInfo or resulting + // FlightData messages. Since this metadata is application-defined, + // a given application could define there to be a relationship, + // but there is none required by the spec. AppMetadata []byte `protobuf:"bytes,4,opt,name=app_metadata,json=appMetadata,proto3" json:"app_metadata,omitempty"` } @@ -1196,7 +1293,6 @@ func (x *FlightEndpoint) GetAppMetadata() []byte { return nil } -// // A location where a Flight service will accept retrieval of a particular // stream given a ticket. type Location struct { @@ -1246,7 +1342,6 @@ func (x *Location) GetUri() string { return "" } -// // An opaque identifier that the service can use to retrieve a particular // portion of a stream. // @@ -1299,24 +1394,19 @@ func (x *Ticket) GetTicket() []byte { return nil } -// // A batch of Arrow data as part of a stream of batches. type FlightData struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // The descriptor of the data. This is only relevant when a client is // starting a new DoPut stream. FlightDescriptor *FlightDescriptor `protobuf:"bytes,1,opt,name=flight_descriptor,json=flightDescriptor,proto3" json:"flight_descriptor,omitempty"` - // // Header for message data as described in Message.fbs::Message. DataHeader []byte `protobuf:"bytes,2,opt,name=data_header,json=dataHeader,proto3" json:"data_header,omitempty"` - // // Application-defined metadata. AppMetadata []byte `protobuf:"bytes,3,opt,name=app_metadata,json=appMetadata,proto3" json:"app_metadata,omitempty"` - // // The actual batch of Arrow data. Preferably handled with minimal-copies // coming last in the definition to help with sidecar patterns (it is // expected that some implementations will fetch this field off the wire @@ -1384,7 +1474,7 @@ func (x *FlightData) GetDataBody() []byte { return nil } -//* +// * // The response message associated with the submission of a DoPut. type PutResult struct { state protoimpl.MessageState @@ -1433,223 +1523,826 @@ func (x *PutResult) GetAppMetadata() []byte { return nil } -var File_Flight_proto protoreflect.FileDescriptor +// EXPERIMENTAL: Union of possible value types for a Session Option to be set to. +// +// By convention, an attempt to set a valueless SessionOptionValue should +// attempt to unset or clear the named option value on the server. +type SessionOptionValue struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields -var file_Flight_proto_rawDesc = []byte{ - 0x0a, 0x0c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x15, - 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x57, 0x0a, 0x10, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, - 0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, - 0x58, 0x0a, 0x11, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, - 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, - 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, - 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, 0x43, 0x0a, 0x09, 0x42, 0x61, 0x73, - 0x69, 0x63, 0x41, 0x75, 0x74, 0x68, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61, - 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x22, 0x07, - 0x0a, 0x05, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x42, 0x0a, 0x0a, 0x41, 0x63, 0x74, 0x69, 0x6f, - 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, - 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, - 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2a, 0x0a, 0x08, 0x43, - 0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x78, 0x70, 0x72, 0x65, - 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x65, 0x78, 0x70, - 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x30, 0x0a, 0x06, 0x41, 0x63, 0x74, 0x69, 0x6f, - 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x50, 0x0a, 0x17, 0x43, 0x61, 0x6e, - 0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, - 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, - 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x22, 0x5f, 0x0a, 0x1a, 0x52, - 0x65, 0x6e, 0x65, 0x77, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, - 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64, - 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72, - 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, - 0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x22, 0x1c, 0x0a, 0x06, - 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x55, 0x0a, 0x16, 0x43, 0x61, - 0x6e, 0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x12, 0x3b, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, - 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x61, 0x6e, - 0x63, 0x65, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x22, 0x26, 0x0a, 0x0c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0xb6, 0x01, 0x0a, 0x10, 0x46, 0x6c, - 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x4a, - 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x61, - 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x63, 0x6d, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x63, 0x6d, 0x64, 0x12, 0x12, 0x0a, 0x04, - 0x70, 0x61, 0x74, 0x68, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, - 0x22, 0x30, 0x0a, 0x0e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x54, 0x79, - 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, - 0x08, 0x0a, 0x04, 0x50, 0x41, 0x54, 0x48, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x4d, 0x44, - 0x10, 0x02, 0x22, 0xc0, 0x02, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, - 0x6f, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, - 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, - 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, - 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, - 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, - 0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, - 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, - 0x6e, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x72, 0x65, 0x63, 0x6f, - 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, - 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, - 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x74, 0x6f, - 0x74, 0x61, 0x6c, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x72, 0x64, 0x65, - 0x72, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72, - 0x65, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x8a, 0x02, 0x0a, 0x08, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e, - 0x66, 0x6f, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, - 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, - 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, - 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, - 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, - 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, - 0x1f, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x01, 0x48, 0x00, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x88, 0x01, 0x01, - 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, - 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, - 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, - 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, - 0x73, 0x73, 0x22, 0xec, 0x01, 0x0a, 0x0e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, - 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x12, 0x35, 0x0a, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, - 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x54, 0x69, - 0x63, 0x6b, 0x65, 0x74, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x3b, 0x0a, 0x08, - 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, - 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, - 0x08, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70, - 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e, - 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x21, - 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x22, 0x1c, 0x0a, 0x08, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x10, 0x0a, - 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, - 0x20, 0x0a, 0x06, 0x54, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x69, 0x63, - 0x6b, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, - 0x74, 0x22, 0xc4, 0x01, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, - 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, - 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x68, - 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x64, 0x61, 0x74, - 0x61, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, - 0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x1c, 0x0a, 0x09, 0x64, 0x61, - 0x74, 0x61, 0x5f, 0x62, 0x6f, 0x64, 0x79, 0x18, 0xe8, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08, - 0x64, 0x61, 0x74, 0x61, 0x42, 0x6f, 0x64, 0x79, 0x22, 0x2e, 0x0a, 0x09, 0x50, 0x75, 0x74, 0x52, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, - 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2a, 0x8b, 0x01, 0x0a, 0x0c, 0x43, 0x61, 0x6e, - 0x63, 0x65, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x19, 0x43, 0x41, 0x4e, - 0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, - 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x43, 0x41, 0x4e, 0x43, - 0x45, 0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, - 0x4c, 0x45, 0x44, 0x10, 0x01, 0x12, 0x1c, 0x0a, 0x18, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, - 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x49, 0x4e, - 0x47, 0x10, 0x02, 0x12, 0x21, 0x0a, 0x1d, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54, - 0x41, 0x54, 0x55, 0x53, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, - 0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x32, 0x85, 0x07, 0x0a, 0x0d, 0x46, 0x6c, 0x69, 0x67, 0x68, - 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x64, 0x0a, 0x09, 0x48, 0x61, 0x6e, 0x64, - 0x73, 0x68, 0x61, 0x6b, 0x65, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, - 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61, - 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, - 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x55, - 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x73, 0x12, 0x1f, 0x2e, - 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x1a, 0x21, - 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, - 0x6f, 0x22, 0x00, 0x30, 0x01, 0x12, 0x5d, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x46, 0x6c, 0x69, 0x67, - 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, - 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, - 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, - 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, - 0x66, 0x6f, 0x22, 0x00, 0x12, 0x5c, 0x0a, 0x0e, 0x50, 0x6f, 0x6c, 0x6c, 0x46, 0x6c, 0x69, 0x67, - 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, - 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, - 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, - 0x1f, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e, 0x66, 0x6f, - 0x22, 0x00, 0x12, 0x5b, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, - 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, - 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, - 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, - 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, - 0x4d, 0x0a, 0x05, 0x44, 0x6f, 0x47, 0x65, 0x74, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, - 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, - 0x2e, 0x54, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, - 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, - 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52, - 0x0a, 0x05, 0x44, 0x6f, 0x50, 0x75, 0x74, 0x12, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, - 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, - 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x1a, 0x20, 0x2e, 0x61, 0x72, 0x72, - 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, - 0x6f, 0x6c, 0x2e, 0x50, 0x75, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x28, 0x01, - 0x30, 0x01, 0x12, 0x58, 0x0a, 0x0a, 0x44, 0x6f, 0x45, 0x78, 0x63, 0x68, 0x61, 0x6e, 0x67, 0x65, - 0x12, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, - 0x61, 0x74, 0x61, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, - 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, - 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x4c, 0x0a, 0x08, - 0x44, 0x6f, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, - 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, - 0x2e, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, - 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, - 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52, 0x0a, 0x0b, 0x4c, 0x69, - 0x73, 0x74, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1c, 0x2e, 0x61, 0x72, 0x72, 0x6f, - 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, - 0x6c, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, - 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, - 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x71, - 0x0a, 0x1c, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x61, 0x72, 0x72, - 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x5a, 0x32, - 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x70, 0x61, 0x63, 0x68, - 0x65, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x67, 0x6f, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, - 0x2f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x66, 0x6c, 0x69, 0x67, - 0x68, 0x74, 0xaa, 0x02, 0x1c, 0x41, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x41, 0x72, 0x72, 0x6f, - 0x77, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, - 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + // Types that are assignable to OptionValue: + // + // *SessionOptionValue_StringValue + // *SessionOptionValue_BoolValue + // *SessionOptionValue_Int64Value + // *SessionOptionValue_DoubleValue + // *SessionOptionValue_StringListValue_ + OptionValue isSessionOptionValue_OptionValue `protobuf_oneof:"option_value"` } -var ( - file_Flight_proto_rawDescOnce sync.Once - file_Flight_proto_rawDescData = file_Flight_proto_rawDesc -) +func (x *SessionOptionValue) Reset() { + *x = SessionOptionValue{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[20] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SessionOptionValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SessionOptionValue) ProtoMessage() {} + +func (x *SessionOptionValue) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[20] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SessionOptionValue.ProtoReflect.Descriptor instead. +func (*SessionOptionValue) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{20} +} + +func (m *SessionOptionValue) GetOptionValue() isSessionOptionValue_OptionValue { + if m != nil { + return m.OptionValue + } + return nil +} + +func (x *SessionOptionValue) GetStringValue() string { + if x, ok := x.GetOptionValue().(*SessionOptionValue_StringValue); ok { + return x.StringValue + } + return "" +} + +func (x *SessionOptionValue) GetBoolValue() bool { + if x, ok := x.GetOptionValue().(*SessionOptionValue_BoolValue); ok { + return x.BoolValue + } + return false +} + +func (x *SessionOptionValue) GetInt64Value() int64 { + if x, ok := x.GetOptionValue().(*SessionOptionValue_Int64Value); ok { + return x.Int64Value + } + return 0 +} + +func (x *SessionOptionValue) GetDoubleValue() float64 { + if x, ok := x.GetOptionValue().(*SessionOptionValue_DoubleValue); ok { + return x.DoubleValue + } + return 0 +} + +func (x *SessionOptionValue) GetStringListValue() *SessionOptionValue_StringListValue { + if x, ok := x.GetOptionValue().(*SessionOptionValue_StringListValue_); ok { + return x.StringListValue + } + return nil +} + +type isSessionOptionValue_OptionValue interface { + isSessionOptionValue_OptionValue() +} + +type SessionOptionValue_StringValue struct { + StringValue string `protobuf:"bytes,1,opt,name=string_value,json=stringValue,proto3,oneof"` +} + +type SessionOptionValue_BoolValue struct { + BoolValue bool `protobuf:"varint,2,opt,name=bool_value,json=boolValue,proto3,oneof"` +} + +type SessionOptionValue_Int64Value struct { + Int64Value int64 `protobuf:"fixed64,3,opt,name=int64_value,json=int64Value,proto3,oneof"` +} + +type SessionOptionValue_DoubleValue struct { + DoubleValue float64 `protobuf:"fixed64,4,opt,name=double_value,json=doubleValue,proto3,oneof"` +} + +type SessionOptionValue_StringListValue_ struct { + StringListValue *SessionOptionValue_StringListValue `protobuf:"bytes,5,opt,name=string_list_value,json=stringListValue,proto3,oneof"` +} + +func (*SessionOptionValue_StringValue) isSessionOptionValue_OptionValue() {} + +func (*SessionOptionValue_BoolValue) isSessionOptionValue_OptionValue() {} + +func (*SessionOptionValue_Int64Value) isSessionOptionValue_OptionValue() {} + +func (*SessionOptionValue_DoubleValue) isSessionOptionValue_OptionValue() {} + +func (*SessionOptionValue_StringListValue_) isSessionOptionValue_OptionValue() {} + +// EXPERIMENTAL: A request to set session options for an existing or new (implicit) +// server session. +// +// Sessions are persisted and referenced via a transport-level state management, typically +// RFC 6265 HTTP cookies when using an HTTP transport. The suggested cookie name or state +// context key is 'arrow_flight_session_id', although implementations may freely choose their +// own name. +// +// Session creation (if one does not already exist) is implied by this RPC request, however +// server implementations may choose to initiate a session that also contains client-provided +// session options at any other time, e.g. on authentication, or when any other call is made +// and the server wishes to use a session to persist any state (or lack thereof). +type SetSessionOptionsRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + SessionOptions map[string]*SessionOptionValue `protobuf:"bytes,1,rep,name=session_options,json=sessionOptions,proto3" json:"session_options,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` +} + +func (x *SetSessionOptionsRequest) Reset() { + *x = SetSessionOptionsRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[21] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SetSessionOptionsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SetSessionOptionsRequest) ProtoMessage() {} + +func (x *SetSessionOptionsRequest) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[21] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SetSessionOptionsRequest.ProtoReflect.Descriptor instead. +func (*SetSessionOptionsRequest) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{21} +} + +func (x *SetSessionOptionsRequest) GetSessionOptions() map[string]*SessionOptionValue { + if x != nil { + return x.SessionOptions + } + return nil +} + +// EXPERIMENTAL: The results (individually) of setting a set of session options. +// +// Option names should only be present in the response if they were not successfully +// set on the server; that is, a response without an Error for a name provided in the +// SetSessionOptionsRequest implies that the named option value was set successfully. +type SetSessionOptionsResult struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Errors map[string]*SetSessionOptionsResult_Error `protobuf:"bytes,1,rep,name=errors,proto3" json:"errors,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` +} + +func (x *SetSessionOptionsResult) Reset() { + *x = SetSessionOptionsResult{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[22] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SetSessionOptionsResult) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SetSessionOptionsResult) ProtoMessage() {} + +func (x *SetSessionOptionsResult) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[22] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SetSessionOptionsResult.ProtoReflect.Descriptor instead. +func (*SetSessionOptionsResult) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{22} +} + +func (x *SetSessionOptionsResult) GetErrors() map[string]*SetSessionOptionsResult_Error { + if x != nil { + return x.Errors + } + return nil +} + +// EXPERIMENTAL: A request to access the session options for the current server session. +// +// The existing session is referenced via a cookie header or similar (see +// SetSessionOptionsRequest above); it is an error to make this request with a missing, +// invalid, or expired session cookie header or other implementation-defined session +// reference token. +type GetSessionOptionsRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *GetSessionOptionsRequest) Reset() { + *x = GetSessionOptionsRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[23] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetSessionOptionsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetSessionOptionsRequest) ProtoMessage() {} + +func (x *GetSessionOptionsRequest) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[23] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetSessionOptionsRequest.ProtoReflect.Descriptor instead. +func (*GetSessionOptionsRequest) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{23} +} + +// EXPERIMENTAL: The result containing the current server session options. +type GetSessionOptionsResult struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + SessionOptions map[string]*SessionOptionValue `protobuf:"bytes,1,rep,name=session_options,json=sessionOptions,proto3" json:"session_options,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` +} + +func (x *GetSessionOptionsResult) Reset() { + *x = GetSessionOptionsResult{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[24] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetSessionOptionsResult) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetSessionOptionsResult) ProtoMessage() {} + +func (x *GetSessionOptionsResult) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[24] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetSessionOptionsResult.ProtoReflect.Descriptor instead. +func (*GetSessionOptionsResult) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{24} +} + +func (x *GetSessionOptionsResult) GetSessionOptions() map[string]*SessionOptionValue { + if x != nil { + return x.SessionOptions + } + return nil +} + +// Request message for the "Close Session" action. +// +// The exiting session is referenced via a cookie header. +type CloseSessionRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *CloseSessionRequest) Reset() { + *x = CloseSessionRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[25] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CloseSessionRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CloseSessionRequest) ProtoMessage() {} + +func (x *CloseSessionRequest) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[25] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CloseSessionRequest.ProtoReflect.Descriptor instead. +func (*CloseSessionRequest) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{25} +} + +// The result of closing a session. +type CloseSessionResult struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status CloseSessionResult_Status `protobuf:"varint,1,opt,name=status,proto3,enum=arrow.flight.protocol.CloseSessionResult_Status" json:"status,omitempty"` +} + +func (x *CloseSessionResult) Reset() { + *x = CloseSessionResult{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[26] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CloseSessionResult) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CloseSessionResult) ProtoMessage() {} + +func (x *CloseSessionResult) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[26] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CloseSessionResult.ProtoReflect.Descriptor instead. +func (*CloseSessionResult) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{26} +} + +func (x *CloseSessionResult) GetStatus() CloseSessionResult_Status { + if x != nil { + return x.Status + } + return CloseSessionResult_UNSPECIFIED +} + +type SessionOptionValue_StringListValue struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Values []string `protobuf:"bytes,1,rep,name=values,proto3" json:"values,omitempty"` +} + +func (x *SessionOptionValue_StringListValue) Reset() { + *x = SessionOptionValue_StringListValue{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[27] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SessionOptionValue_StringListValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SessionOptionValue_StringListValue) ProtoMessage() {} + +func (x *SessionOptionValue_StringListValue) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[27] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SessionOptionValue_StringListValue.ProtoReflect.Descriptor instead. +func (*SessionOptionValue_StringListValue) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{20, 0} +} + +func (x *SessionOptionValue_StringListValue) GetValues() []string { + if x != nil { + return x.Values + } + return nil +} + +type SetSessionOptionsResult_Error struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Value SetSessionOptionsResult_ErrorValue `protobuf:"varint,1,opt,name=value,proto3,enum=arrow.flight.protocol.SetSessionOptionsResult_ErrorValue" json:"value,omitempty"` +} + +func (x *SetSessionOptionsResult_Error) Reset() { + *x = SetSessionOptionsResult_Error{} + if protoimpl.UnsafeEnabled { + mi := &file_Flight_proto_msgTypes[29] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SetSessionOptionsResult_Error) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SetSessionOptionsResult_Error) ProtoMessage() {} + +func (x *SetSessionOptionsResult_Error) ProtoReflect() protoreflect.Message { + mi := &file_Flight_proto_msgTypes[29] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SetSessionOptionsResult_Error.ProtoReflect.Descriptor instead. +func (*SetSessionOptionsResult_Error) Descriptor() ([]byte, []int) { + return file_Flight_proto_rawDescGZIP(), []int{22, 0} +} + +func (x *SetSessionOptionsResult_Error) GetValue() SetSessionOptionsResult_ErrorValue { + if x != nil { + return x.Value + } + return SetSessionOptionsResult_UNSPECIFIED +} + +var File_Flight_proto protoreflect.FileDescriptor + +var file_Flight_proto_rawDesc = []byte{ + 0x0a, 0x0c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x15, + 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x57, 0x0a, 0x10, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, + 0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, + 0x58, 0x0a, 0x11, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, + 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, + 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, 0x43, 0x0a, 0x09, 0x42, 0x61, 0x73, + 0x69, 0x63, 0x41, 0x75, 0x74, 0x68, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61, + 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61, + 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x22, 0x07, + 0x0a, 0x05, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x42, 0x0a, 0x0a, 0x41, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, + 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, + 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2a, 0x0a, 0x08, 0x43, + 0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x78, 0x70, 0x72, 0x65, + 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x65, 0x78, 0x70, + 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x30, 0x0a, 0x06, 0x41, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x50, 0x0a, 0x17, 0x43, 0x61, 0x6e, + 0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, + 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, + 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x22, 0x5f, 0x0a, 0x1a, 0x52, + 0x65, 0x6e, 0x65, 0x77, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, + 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64, + 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72, + 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, + 0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x22, 0x1c, 0x0a, 0x06, + 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x55, 0x0a, 0x16, 0x43, 0x61, + 0x6e, 0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x12, 0x3b, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x61, 0x6e, + 0x63, 0x65, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x22, 0x26, 0x0a, 0x0c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0xb6, 0x01, 0x0a, 0x10, 0x46, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x4a, + 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x61, + 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, + 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x63, 0x6d, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x63, 0x6d, 0x64, 0x12, 0x12, 0x0a, 0x04, + 0x70, 0x61, 0x74, 0x68, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, + 0x22, 0x30, 0x0a, 0x0e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x54, 0x79, + 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, + 0x08, 0x0a, 0x04, 0x50, 0x41, 0x54, 0x48, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x4d, 0x44, + 0x10, 0x02, 0x22, 0xc0, 0x02, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, + 0x6f, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, + 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, + 0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, + 0x6e, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x72, 0x65, 0x63, 0x6f, + 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x72, 0x64, 0x65, + 0x72, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72, + 0x65, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x8a, 0x02, 0x0a, 0x08, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e, + 0x66, 0x6f, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, + 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, + 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, + 0x1f, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x01, 0x48, 0x00, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x88, 0x01, 0x01, + 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, + 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, + 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, + 0x73, 0x73, 0x22, 0xec, 0x01, 0x0a, 0x0e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, + 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x12, 0x35, 0x0a, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x54, 0x69, + 0x63, 0x6b, 0x65, 0x74, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x3b, 0x0a, 0x08, + 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, + 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, + 0x08, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70, + 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e, + 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x21, + 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x22, 0x1c, 0x0a, 0x08, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x10, 0x0a, + 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, + 0x20, 0x0a, 0x06, 0x54, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x69, 0x63, + 0x6b, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, + 0x74, 0x22, 0xc4, 0x01, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, + 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, + 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, + 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x68, + 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x64, 0x61, 0x74, + 0x61, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, + 0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x1c, 0x0a, 0x09, 0x64, 0x61, + 0x74, 0x61, 0x5f, 0x62, 0x6f, 0x64, 0x79, 0x18, 0xe8, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08, + 0x64, 0x61, 0x74, 0x61, 0x42, 0x6f, 0x64, 0x79, 0x22, 0x2e, 0x0a, 0x09, 0x50, 0x75, 0x74, 0x52, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, + 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0xc6, 0x02, 0x0a, 0x12, 0x53, 0x65, 0x73, + 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, + 0x23, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1f, 0x0a, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x5f, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x48, 0x00, 0x52, 0x09, 0x62, 0x6f, 0x6f, 0x6c, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a, 0x0b, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x10, 0x48, 0x00, 0x52, 0x0a, 0x69, 0x6e, + 0x74, 0x36, 0x34, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x64, 0x6f, 0x75, 0x62, + 0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x01, 0x48, 0x00, + 0x52, 0x0b, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x67, 0x0a, + 0x11, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x5f, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x39, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, + 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, + 0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x2e, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x4c, 0x69, 0x73, 0x74, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x48, 0x00, 0x52, 0x0f, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x4c, 0x69, 0x73, + 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x1a, 0x29, 0x0a, 0x0f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, + 0x4c, 0x69, 0x73, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x73, 0x42, 0x0e, 0x0a, 0x0c, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x22, 0xf6, 0x01, 0x0a, 0x18, 0x53, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x6c, + 0x0a, 0x0f, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x43, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, + 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, + 0x53, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0e, 0x73, 0x65, + 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x6c, 0x0a, 0x13, + 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x3f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x73, + 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x87, 0x03, 0x0a, 0x17, 0x53, + 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x52, 0x0a, 0x06, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, + 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3a, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, + 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, + 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x52, 0x06, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x1a, 0x58, 0x0a, 0x05, 0x45, 0x72, + 0x72, 0x6f, 0x72, 0x12, 0x4f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x39, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, + 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x74, 0x53, 0x65, + 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x1a, 0x6f, 0x0a, 0x0b, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x4a, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x74, + 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x4d, 0x0a, 0x0a, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, + 0x45, 0x44, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, + 0x4e, 0x41, 0x4d, 0x45, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, + 0x44, 0x5f, 0x56, 0x41, 0x4c, 0x55, 0x45, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x45, 0x52, 0x52, + 0x4f, 0x52, 0x10, 0x03, 0x22, 0x1a, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x22, 0xf4, 0x01, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x6b, 0x0a, 0x0f, + 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x42, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x47, 0x65, + 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0e, 0x73, 0x65, 0x73, 0x73, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x6c, 0x0a, 0x13, 0x53, 0x65, 0x73, + 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, + 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, + 0x65, 0x79, 0x12, 0x3f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x29, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x15, 0x0a, 0x13, 0x43, 0x6c, 0x6f, 0x73, 0x65, + 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0xa5, + 0x01, 0x0a, 0x12, 0x43, 0x6c, 0x6f, 0x73, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x48, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x6c, + 0x6f, 0x73, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, + 0x45, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x53, + 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x43, 0x4c, + 0x4f, 0x53, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x4c, 0x4f, 0x53, 0x49, 0x4e, + 0x47, 0x10, 0x02, 0x12, 0x11, 0x0a, 0x0d, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x4c, 0x4f, 0x53, 0x45, + 0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x2a, 0x8b, 0x01, 0x0a, 0x0c, 0x43, 0x61, 0x6e, 0x63, 0x65, + 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x19, 0x43, 0x41, 0x4e, 0x43, 0x45, + 0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, + 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, + 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x45, + 0x44, 0x10, 0x01, 0x12, 0x1c, 0x0a, 0x18, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54, + 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x49, 0x4e, 0x47, 0x10, + 0x02, 0x12, 0x21, 0x0a, 0x1d, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54, + 0x55, 0x53, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x41, 0x42, + 0x4c, 0x45, 0x10, 0x03, 0x32, 0x85, 0x07, 0x0a, 0x0d, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x53, + 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x64, 0x0a, 0x09, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, + 0x61, 0x6b, 0x65, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, + 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61, 0x6e, 0x64, + 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, 0x2e, 0x61, + 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x55, 0x0a, 0x0b, + 0x4c, 0x69, 0x73, 0x74, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x61, 0x72, + 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x1a, 0x21, 0x2e, 0x61, + 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x22, + 0x00, 0x30, 0x01, 0x12, 0x5d, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x21, 0x2e, + 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, + 0x22, 0x00, 0x12, 0x5c, 0x0a, 0x0e, 0x50, 0x6f, 0x6c, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, + 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x1f, 0x2e, + 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x22, 0x00, + 0x12, 0x5b, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x27, 0x2e, + 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, + 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, + 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, + 0x05, 0x44, 0x6f, 0x47, 0x65, 0x74, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, + 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x54, + 0x69, 0x63, 0x6b, 0x65, 0x74, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52, 0x0a, 0x05, + 0x44, 0x6f, 0x50, 0x75, 0x74, 0x12, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x1a, 0x20, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, + 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, + 0x2e, 0x50, 0x75, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, + 0x12, 0x58, 0x0a, 0x0a, 0x44, 0x6f, 0x45, 0x78, 0x63, 0x68, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x21, + 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, + 0x61, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x4c, 0x0a, 0x08, 0x44, 0x6f, + 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, + 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x41, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52, 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, + 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1c, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, + 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, + 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x41, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x71, 0x0a, 0x1c, + 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, + 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x5a, 0x32, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2f, + 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x67, 0x6f, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x66, + 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, + 0xaa, 0x02, 0x1c, 0x41, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x41, 0x72, 0x72, 0x6f, 0x77, 0x2e, + 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_Flight_proto_rawDescOnce sync.Once + file_Flight_proto_rawDescData = file_Flight_proto_rawDesc +) func file_Flight_proto_rawDescGZIP() []byte { file_Flight_proto_rawDescOnce.Do(func() { @@ -1658,72 +2351,95 @@ func file_Flight_proto_rawDescGZIP() []byte { return file_Flight_proto_rawDescData } -var file_Flight_proto_enumTypes = make([]protoimpl.EnumInfo, 2) -var file_Flight_proto_msgTypes = make([]protoimpl.MessageInfo, 20) +var file_Flight_proto_enumTypes = make([]protoimpl.EnumInfo, 4) +var file_Flight_proto_msgTypes = make([]protoimpl.MessageInfo, 32) var file_Flight_proto_goTypes = []interface{}{ - (CancelStatus)(0), // 0: arrow.flight.protocol.CancelStatus - (FlightDescriptor_DescriptorType)(0), // 1: arrow.flight.protocol.FlightDescriptor.DescriptorType - (*HandshakeRequest)(nil), // 2: arrow.flight.protocol.HandshakeRequest - (*HandshakeResponse)(nil), // 3: arrow.flight.protocol.HandshakeResponse - (*BasicAuth)(nil), // 4: arrow.flight.protocol.BasicAuth - (*Empty)(nil), // 5: arrow.flight.protocol.Empty - (*ActionType)(nil), // 6: arrow.flight.protocol.ActionType - (*Criteria)(nil), // 7: arrow.flight.protocol.Criteria - (*Action)(nil), // 8: arrow.flight.protocol.Action - (*CancelFlightInfoRequest)(nil), // 9: arrow.flight.protocol.CancelFlightInfoRequest - (*RenewFlightEndpointRequest)(nil), // 10: arrow.flight.protocol.RenewFlightEndpointRequest - (*Result)(nil), // 11: arrow.flight.protocol.Result - (*CancelFlightInfoResult)(nil), // 12: arrow.flight.protocol.CancelFlightInfoResult - (*SchemaResult)(nil), // 13: arrow.flight.protocol.SchemaResult - (*FlightDescriptor)(nil), // 14: arrow.flight.protocol.FlightDescriptor - (*FlightInfo)(nil), // 15: arrow.flight.protocol.FlightInfo - (*PollInfo)(nil), // 16: arrow.flight.protocol.PollInfo - (*FlightEndpoint)(nil), // 17: arrow.flight.protocol.FlightEndpoint - (*Location)(nil), // 18: arrow.flight.protocol.Location - (*Ticket)(nil), // 19: arrow.flight.protocol.Ticket - (*FlightData)(nil), // 20: arrow.flight.protocol.FlightData - (*PutResult)(nil), // 21: arrow.flight.protocol.PutResult - (*timestamppb.Timestamp)(nil), // 22: google.protobuf.Timestamp + (CancelStatus)(0), // 0: arrow.flight.protocol.CancelStatus + (FlightDescriptor_DescriptorType)(0), // 1: arrow.flight.protocol.FlightDescriptor.DescriptorType + (SetSessionOptionsResult_ErrorValue)(0), // 2: arrow.flight.protocol.SetSessionOptionsResult.ErrorValue + (CloseSessionResult_Status)(0), // 3: arrow.flight.protocol.CloseSessionResult.Status + (*HandshakeRequest)(nil), // 4: arrow.flight.protocol.HandshakeRequest + (*HandshakeResponse)(nil), // 5: arrow.flight.protocol.HandshakeResponse + (*BasicAuth)(nil), // 6: arrow.flight.protocol.BasicAuth + (*Empty)(nil), // 7: arrow.flight.protocol.Empty + (*ActionType)(nil), // 8: arrow.flight.protocol.ActionType + (*Criteria)(nil), // 9: arrow.flight.protocol.Criteria + (*Action)(nil), // 10: arrow.flight.protocol.Action + (*CancelFlightInfoRequest)(nil), // 11: arrow.flight.protocol.CancelFlightInfoRequest + (*RenewFlightEndpointRequest)(nil), // 12: arrow.flight.protocol.RenewFlightEndpointRequest + (*Result)(nil), // 13: arrow.flight.protocol.Result + (*CancelFlightInfoResult)(nil), // 14: arrow.flight.protocol.CancelFlightInfoResult + (*SchemaResult)(nil), // 15: arrow.flight.protocol.SchemaResult + (*FlightDescriptor)(nil), // 16: arrow.flight.protocol.FlightDescriptor + (*FlightInfo)(nil), // 17: arrow.flight.protocol.FlightInfo + (*PollInfo)(nil), // 18: arrow.flight.protocol.PollInfo + (*FlightEndpoint)(nil), // 19: arrow.flight.protocol.FlightEndpoint + (*Location)(nil), // 20: arrow.flight.protocol.Location + (*Ticket)(nil), // 21: arrow.flight.protocol.Ticket + (*FlightData)(nil), // 22: arrow.flight.protocol.FlightData + (*PutResult)(nil), // 23: arrow.flight.protocol.PutResult + (*SessionOptionValue)(nil), // 24: arrow.flight.protocol.SessionOptionValue + (*SetSessionOptionsRequest)(nil), // 25: arrow.flight.protocol.SetSessionOptionsRequest + (*SetSessionOptionsResult)(nil), // 26: arrow.flight.protocol.SetSessionOptionsResult + (*GetSessionOptionsRequest)(nil), // 27: arrow.flight.protocol.GetSessionOptionsRequest + (*GetSessionOptionsResult)(nil), // 28: arrow.flight.protocol.GetSessionOptionsResult + (*CloseSessionRequest)(nil), // 29: arrow.flight.protocol.CloseSessionRequest + (*CloseSessionResult)(nil), // 30: arrow.flight.protocol.CloseSessionResult + (*SessionOptionValue_StringListValue)(nil), // 31: arrow.flight.protocol.SessionOptionValue.StringListValue + nil, // 32: arrow.flight.protocol.SetSessionOptionsRequest.SessionOptionsEntry + (*SetSessionOptionsResult_Error)(nil), // 33: arrow.flight.protocol.SetSessionOptionsResult.Error + nil, // 34: arrow.flight.protocol.SetSessionOptionsResult.ErrorsEntry + nil, // 35: arrow.flight.protocol.GetSessionOptionsResult.SessionOptionsEntry + (*timestamppb.Timestamp)(nil), // 36: google.protobuf.Timestamp } var file_Flight_proto_depIdxs = []int32{ - 15, // 0: arrow.flight.protocol.CancelFlightInfoRequest.info:type_name -> arrow.flight.protocol.FlightInfo - 17, // 1: arrow.flight.protocol.RenewFlightEndpointRequest.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint + 17, // 0: arrow.flight.protocol.CancelFlightInfoRequest.info:type_name -> arrow.flight.protocol.FlightInfo + 19, // 1: arrow.flight.protocol.RenewFlightEndpointRequest.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint 0, // 2: arrow.flight.protocol.CancelFlightInfoResult.status:type_name -> arrow.flight.protocol.CancelStatus 1, // 3: arrow.flight.protocol.FlightDescriptor.type:type_name -> arrow.flight.protocol.FlightDescriptor.DescriptorType - 14, // 4: arrow.flight.protocol.FlightInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor - 17, // 5: arrow.flight.protocol.FlightInfo.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint - 15, // 6: arrow.flight.protocol.PollInfo.info:type_name -> arrow.flight.protocol.FlightInfo - 14, // 7: arrow.flight.protocol.PollInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor - 22, // 8: arrow.flight.protocol.PollInfo.expiration_time:type_name -> google.protobuf.Timestamp - 19, // 9: arrow.flight.protocol.FlightEndpoint.ticket:type_name -> arrow.flight.protocol.Ticket - 18, // 10: arrow.flight.protocol.FlightEndpoint.location:type_name -> arrow.flight.protocol.Location - 22, // 11: arrow.flight.protocol.FlightEndpoint.expiration_time:type_name -> google.protobuf.Timestamp - 14, // 12: arrow.flight.protocol.FlightData.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor - 2, // 13: arrow.flight.protocol.FlightService.Handshake:input_type -> arrow.flight.protocol.HandshakeRequest - 7, // 14: arrow.flight.protocol.FlightService.ListFlights:input_type -> arrow.flight.protocol.Criteria - 14, // 15: arrow.flight.protocol.FlightService.GetFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor - 14, // 16: arrow.flight.protocol.FlightService.PollFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor - 14, // 17: arrow.flight.protocol.FlightService.GetSchema:input_type -> arrow.flight.protocol.FlightDescriptor - 19, // 18: arrow.flight.protocol.FlightService.DoGet:input_type -> arrow.flight.protocol.Ticket - 20, // 19: arrow.flight.protocol.FlightService.DoPut:input_type -> arrow.flight.protocol.FlightData - 20, // 20: arrow.flight.protocol.FlightService.DoExchange:input_type -> arrow.flight.protocol.FlightData - 8, // 21: arrow.flight.protocol.FlightService.DoAction:input_type -> arrow.flight.protocol.Action - 5, // 22: arrow.flight.protocol.FlightService.ListActions:input_type -> arrow.flight.protocol.Empty - 3, // 23: arrow.flight.protocol.FlightService.Handshake:output_type -> arrow.flight.protocol.HandshakeResponse - 15, // 24: arrow.flight.protocol.FlightService.ListFlights:output_type -> arrow.flight.protocol.FlightInfo - 15, // 25: arrow.flight.protocol.FlightService.GetFlightInfo:output_type -> arrow.flight.protocol.FlightInfo - 16, // 26: arrow.flight.protocol.FlightService.PollFlightInfo:output_type -> arrow.flight.protocol.PollInfo - 13, // 27: arrow.flight.protocol.FlightService.GetSchema:output_type -> arrow.flight.protocol.SchemaResult - 20, // 28: arrow.flight.protocol.FlightService.DoGet:output_type -> arrow.flight.protocol.FlightData - 21, // 29: arrow.flight.protocol.FlightService.DoPut:output_type -> arrow.flight.protocol.PutResult - 20, // 30: arrow.flight.protocol.FlightService.DoExchange:output_type -> arrow.flight.protocol.FlightData - 11, // 31: arrow.flight.protocol.FlightService.DoAction:output_type -> arrow.flight.protocol.Result - 6, // 32: arrow.flight.protocol.FlightService.ListActions:output_type -> arrow.flight.protocol.ActionType - 23, // [23:33] is the sub-list for method output_type - 13, // [13:23] is the sub-list for method input_type - 13, // [13:13] is the sub-list for extension type_name - 13, // [13:13] is the sub-list for extension extendee - 0, // [0:13] is the sub-list for field type_name + 16, // 4: arrow.flight.protocol.FlightInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor + 19, // 5: arrow.flight.protocol.FlightInfo.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint + 17, // 6: arrow.flight.protocol.PollInfo.info:type_name -> arrow.flight.protocol.FlightInfo + 16, // 7: arrow.flight.protocol.PollInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor + 36, // 8: arrow.flight.protocol.PollInfo.expiration_time:type_name -> google.protobuf.Timestamp + 21, // 9: arrow.flight.protocol.FlightEndpoint.ticket:type_name -> arrow.flight.protocol.Ticket + 20, // 10: arrow.flight.protocol.FlightEndpoint.location:type_name -> arrow.flight.protocol.Location + 36, // 11: arrow.flight.protocol.FlightEndpoint.expiration_time:type_name -> google.protobuf.Timestamp + 16, // 12: arrow.flight.protocol.FlightData.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor + 31, // 13: arrow.flight.protocol.SessionOptionValue.string_list_value:type_name -> arrow.flight.protocol.SessionOptionValue.StringListValue + 32, // 14: arrow.flight.protocol.SetSessionOptionsRequest.session_options:type_name -> arrow.flight.protocol.SetSessionOptionsRequest.SessionOptionsEntry + 34, // 15: arrow.flight.protocol.SetSessionOptionsResult.errors:type_name -> arrow.flight.protocol.SetSessionOptionsResult.ErrorsEntry + 35, // 16: arrow.flight.protocol.GetSessionOptionsResult.session_options:type_name -> arrow.flight.protocol.GetSessionOptionsResult.SessionOptionsEntry + 3, // 17: arrow.flight.protocol.CloseSessionResult.status:type_name -> arrow.flight.protocol.CloseSessionResult.Status + 24, // 18: arrow.flight.protocol.SetSessionOptionsRequest.SessionOptionsEntry.value:type_name -> arrow.flight.protocol.SessionOptionValue + 2, // 19: arrow.flight.protocol.SetSessionOptionsResult.Error.value:type_name -> arrow.flight.protocol.SetSessionOptionsResult.ErrorValue + 33, // 20: arrow.flight.protocol.SetSessionOptionsResult.ErrorsEntry.value:type_name -> arrow.flight.protocol.SetSessionOptionsResult.Error + 24, // 21: arrow.flight.protocol.GetSessionOptionsResult.SessionOptionsEntry.value:type_name -> arrow.flight.protocol.SessionOptionValue + 4, // 22: arrow.flight.protocol.FlightService.Handshake:input_type -> arrow.flight.protocol.HandshakeRequest + 9, // 23: arrow.flight.protocol.FlightService.ListFlights:input_type -> arrow.flight.protocol.Criteria + 16, // 24: arrow.flight.protocol.FlightService.GetFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor + 16, // 25: arrow.flight.protocol.FlightService.PollFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor + 16, // 26: arrow.flight.protocol.FlightService.GetSchema:input_type -> arrow.flight.protocol.FlightDescriptor + 21, // 27: arrow.flight.protocol.FlightService.DoGet:input_type -> arrow.flight.protocol.Ticket + 22, // 28: arrow.flight.protocol.FlightService.DoPut:input_type -> arrow.flight.protocol.FlightData + 22, // 29: arrow.flight.protocol.FlightService.DoExchange:input_type -> arrow.flight.protocol.FlightData + 10, // 30: arrow.flight.protocol.FlightService.DoAction:input_type -> arrow.flight.protocol.Action + 7, // 31: arrow.flight.protocol.FlightService.ListActions:input_type -> arrow.flight.protocol.Empty + 5, // 32: arrow.flight.protocol.FlightService.Handshake:output_type -> arrow.flight.protocol.HandshakeResponse + 17, // 33: arrow.flight.protocol.FlightService.ListFlights:output_type -> arrow.flight.protocol.FlightInfo + 17, // 34: arrow.flight.protocol.FlightService.GetFlightInfo:output_type -> arrow.flight.protocol.FlightInfo + 18, // 35: arrow.flight.protocol.FlightService.PollFlightInfo:output_type -> arrow.flight.protocol.PollInfo + 15, // 36: arrow.flight.protocol.FlightService.GetSchema:output_type -> arrow.flight.protocol.SchemaResult + 22, // 37: arrow.flight.protocol.FlightService.DoGet:output_type -> arrow.flight.protocol.FlightData + 23, // 38: arrow.flight.protocol.FlightService.DoPut:output_type -> arrow.flight.protocol.PutResult + 22, // 39: arrow.flight.protocol.FlightService.DoExchange:output_type -> arrow.flight.protocol.FlightData + 13, // 40: arrow.flight.protocol.FlightService.DoAction:output_type -> arrow.flight.protocol.Result + 8, // 41: arrow.flight.protocol.FlightService.ListActions:output_type -> arrow.flight.protocol.ActionType + 32, // [32:42] is the sub-list for method output_type + 22, // [22:32] is the sub-list for method input_type + 22, // [22:22] is the sub-list for extension type_name + 22, // [22:22] is the sub-list for extension extendee + 0, // [0:22] is the sub-list for field type_name } func init() { file_Flight_proto_init() } @@ -1972,15 +2688,130 @@ func file_Flight_proto_init() { return nil } } + file_Flight_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SessionOptionValue); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SetSessionOptionsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SetSessionOptionsResult); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[23].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetSessionOptionsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[24].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetSessionOptionsResult); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[25].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CloseSessionRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CloseSessionResult); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SessionOptionValue_StringListValue); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_Flight_proto_msgTypes[29].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SetSessionOptionsResult_Error); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } file_Flight_proto_msgTypes[14].OneofWrappers = []interface{}{} + file_Flight_proto_msgTypes[20].OneofWrappers = []interface{}{ + (*SessionOptionValue_StringValue)(nil), + (*SessionOptionValue_BoolValue)(nil), + (*SessionOptionValue_Int64Value)(nil), + (*SessionOptionValue_DoubleValue)(nil), + (*SessionOptionValue_StringListValue_)(nil), + } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_Flight_proto_rawDesc, - NumEnums: 2, - NumMessages: 20, + NumEnums: 4, + NumMessages: 32, NumExtensions: 0, NumServices: 1, }, diff --git a/go/arrow/flight/gen/flight/FlightSql.pb.go b/go/arrow/flight/gen/flight/FlightSql.pb.go index 279dc29c4262a..d886bc6bdb70b 100644 --- a/go/arrow/flight/gen/flight/FlightSql.pb.go +++ b/go/arrow/flight/gen/flight/FlightSql.pb.go @@ -17,8 +17,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 -// protoc v4.23.4 +// protoc-gen-go v1.31.0 +// protoc v4.25.2 // source: FlightSql.proto package flight @@ -48,33 +48,27 @@ const ( SqlInfo_FLIGHT_SQL_SERVER_VERSION SqlInfo = 1 // Retrieves a UTF-8 string with the Arrow format version of the Flight SQL Server. SqlInfo_FLIGHT_SQL_SERVER_ARROW_VERSION SqlInfo = 2 - // // Retrieves a boolean value indicating whether the Flight SQL Server is read only. // // Returns: // - false: if read-write // - true: if read only SqlInfo_FLIGHT_SQL_SERVER_READ_ONLY SqlInfo = 3 - // // Retrieves a boolean value indicating whether the Flight SQL Server supports executing // SQL queries. // // Note that the absence of this info (as opposed to a false value) does not necessarily // mean that SQL is not supported, as this property was not originally defined. SqlInfo_FLIGHT_SQL_SERVER_SQL SqlInfo = 4 - // // Retrieves a boolean value indicating whether the Flight SQL Server supports executing // Substrait plans. SqlInfo_FLIGHT_SQL_SERVER_SUBSTRAIT SqlInfo = 5 - // // Retrieves a string value indicating the minimum supported Substrait version, or null // if Substrait is not supported. SqlInfo_FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION SqlInfo = 6 - // // Retrieves a string value indicating the maximum supported Substrait version, or null // if Substrait is not supported. SqlInfo_FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION SqlInfo = 7 - // // Retrieves an int32 indicating whether the Flight SQL Server supports the // BeginTransaction/EndTransaction/BeginSavepoint/EndSavepoint actions. // @@ -84,61 +78,51 @@ const ( // // The possible values are listed in `SqlSupportedTransaction`. SqlInfo_FLIGHT_SQL_SERVER_TRANSACTION SqlInfo = 8 - // // Retrieves a boolean value indicating whether the Flight SQL Server supports explicit // query cancellation (the CancelQuery action). SqlInfo_FLIGHT_SQL_SERVER_CANCEL SqlInfo = 9 - // // Retrieves an int32 indicating the timeout (in milliseconds) for prepared statement handles. // // If 0, there is no timeout. Servers should reset the timeout when the handle is used in a command. SqlInfo_FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT SqlInfo = 100 - // // Retrieves an int32 indicating the timeout (in milliseconds) for transactions, since transactions are not tied to a connection. // // If 0, there is no timeout. Servers should reset the timeout when the handle is used in a command. SqlInfo_FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT SqlInfo = 101 - // // Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of catalogs. // // Returns: // - false: if it doesn't support CREATE and DROP of catalogs. // - true: if it supports CREATE and DROP of catalogs. SqlInfo_SQL_DDL_CATALOG SqlInfo = 500 - // // Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of schemas. // // Returns: // - false: if it doesn't support CREATE and DROP of schemas. // - true: if it supports CREATE and DROP of schemas. SqlInfo_SQL_DDL_SCHEMA SqlInfo = 501 - // // Indicates whether the Flight SQL Server supports CREATE and DROP of tables. // // Returns: // - false: if it doesn't support CREATE and DROP of tables. // - true: if it supports CREATE and DROP of tables. SqlInfo_SQL_DDL_TABLE SqlInfo = 502 - // // Retrieves a int32 ordinal representing the case sensitivity of catalog, table, schema and table names. // // The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`. SqlInfo_SQL_IDENTIFIER_CASE SqlInfo = 503 // Retrieves a UTF-8 string with the supported character(s) used to surround a delimited identifier. SqlInfo_SQL_IDENTIFIER_QUOTE_CHAR SqlInfo = 504 - // // Retrieves a int32 describing the case sensitivity of quoted identifiers. // // The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`. SqlInfo_SQL_QUOTED_IDENTIFIER_CASE SqlInfo = 505 - // // Retrieves a boolean value indicating whether all tables are selectable. // // Returns: // - false: if not all tables are selectable or if none are; // - true: if all tables are selectable. SqlInfo_SQL_ALL_TABLES_ARE_SELECTABLE SqlInfo = 506 - // // Retrieves the null ordering. // // Returns a int32 ordinal for the null ordering being used, as described in @@ -154,18 +138,15 @@ const ( SqlInfo_SQL_SYSTEM_FUNCTIONS SqlInfo = 511 // Retrieves a UTF-8 string list with values of the supported datetime functions. SqlInfo_SQL_DATETIME_FUNCTIONS SqlInfo = 512 - // // Retrieves the UTF-8 string that can be used to escape wildcard characters. // This is the string that can be used to escape '_' or '%' in the catalog search parameters that are a pattern // (and therefore use one of the wildcard characters). // The '_' character represents any single character; the '%' character represents any sequence of zero or more // characters. SqlInfo_SQL_SEARCH_STRING_ESCAPE SqlInfo = 513 - // // Retrieves a UTF-8 string with all the "extra" characters that can be used in unquoted identifier names // (those beyond a-z, A-Z, 0-9 and _). SqlInfo_SQL_EXTRA_NAME_CHARACTERS SqlInfo = 514 - // // Retrieves a boolean value indicating whether column aliasing is supported. // If so, the SQL AS clause can be used to provide names for computed columns or to provide alias names for columns // as required. @@ -174,7 +155,6 @@ const ( // - false: if column aliasing is unsupported; // - true: if column aliasing is supported. SqlInfo_SQL_SUPPORTS_COLUMN_ALIASING SqlInfo = 515 - // // Retrieves a boolean value indicating whether concatenations between null and non-null values being // null are supported. // @@ -182,13 +162,11 @@ const ( // - false: if concatenations between null and non-null values being null are unsupported; // - true: if concatenations between null and non-null values being null are supported. SqlInfo_SQL_NULL_PLUS_NULL_IS_NULL SqlInfo = 516 - // // Retrieves a map where the key is the type to convert from and the value is a list with the types to convert to, // indicating the supported conversions. Each key and each item on the list value is a value to a predefined type on // SqlSupportsConvert enum. // The returned map will be: map> SqlInfo_SQL_SUPPORTS_CONVERT SqlInfo = 517 - // // Retrieves a boolean value indicating whether, when table correlation names are supported, // they are restricted to being different from the names of the tables. // @@ -196,7 +174,6 @@ const ( // - false: if table correlation names are unsupported; // - true: if table correlation names are supported. SqlInfo_SQL_SUPPORTS_TABLE_CORRELATION_NAMES SqlInfo = 518 - // // Retrieves a boolean value indicating whether, when table correlation names are supported, // they are restricted to being different from the names of the tables. // @@ -204,14 +181,12 @@ const ( // - false: if different table correlation names are unsupported; // - true: if different table correlation names are supported SqlInfo_SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES SqlInfo = 519 - // // Retrieves a boolean value indicating whether expressions in ORDER BY lists are supported. // // Returns: // - false: if expressions in ORDER BY are unsupported; // - true: if expressions in ORDER BY are supported; SqlInfo_SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY SqlInfo = 520 - // // Retrieves a boolean value indicating whether using a column that is not in the SELECT statement in a GROUP BY // clause is supported. // @@ -219,7 +194,6 @@ const ( // - false: if using a column that is not in the SELECT statement in a GROUP BY clause is unsupported; // - true: if using a column that is not in the SELECT statement in a GROUP BY clause is supported. SqlInfo_SQL_SUPPORTS_ORDER_BY_UNRELATED SqlInfo = 521 - // // Retrieves the supported GROUP BY commands; // // Returns an int32 bitmask value representing the supported commands. @@ -232,21 +206,18 @@ const ( // - return 3 (\b11) => [SQL_GROUP_BY_UNRELATED, SQL_GROUP_BY_BEYOND_SELECT]. // Valid GROUP BY types are described under `arrow.flight.protocol.sql.SqlSupportedGroupBy`. SqlInfo_SQL_SUPPORTED_GROUP_BY SqlInfo = 522 - // // Retrieves a boolean value indicating whether specifying a LIKE escape clause is supported. // // Returns: // - false: if specifying a LIKE escape clause is unsupported; // - true: if specifying a LIKE escape clause is supported. SqlInfo_SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE SqlInfo = 523 - // // Retrieves a boolean value indicating whether columns may be defined as non-nullable. // // Returns: // - false: if columns cannot be defined as non-nullable; // - true: if columns may be defined as non-nullable. SqlInfo_SQL_SUPPORTS_NON_NULLABLE_COLUMNS SqlInfo = 524 - // // Retrieves the supported SQL grammar level as per the ODBC specification. // // Returns an int32 bitmask value representing the supported SQL grammar level. @@ -263,7 +234,6 @@ const ( // - return 7 (\b111) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR]. // Valid SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedSqlGrammar`. SqlInfo_SQL_SUPPORTED_GRAMMAR SqlInfo = 525 - // // Retrieves the supported ANSI92 SQL grammar level. // // Returns an int32 bitmask value representing the supported ANSI92 SQL grammar level. @@ -280,14 +250,12 @@ const ( // - return 7 (\b111) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL]. // Valid ANSI92 SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedAnsi92SqlGrammarLevel`. SqlInfo_SQL_ANSI92_SUPPORTED_LEVEL SqlInfo = 526 - // // Retrieves a boolean value indicating whether the SQL Integrity Enhancement Facility is supported. // // Returns: // - false: if the SQL Integrity Enhancement Facility is supported; // - true: if the SQL Integrity Enhancement Facility is supported. SqlInfo_SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY SqlInfo = 527 - // // Retrieves the support level for SQL OUTER JOINs. // // Returns a int32 ordinal for the SQL ordering being used, as described in @@ -297,17 +265,14 @@ const ( SqlInfo_SQL_SCHEMA_TERM SqlInfo = 529 // Retrieves a UTF-8 string with the preferred term for "procedure". SqlInfo_SQL_PROCEDURE_TERM SqlInfo = 530 - // // Retrieves a UTF-8 string with the preferred term for "catalog". // If a empty string is returned its assumed that the server does NOT supports catalogs. SqlInfo_SQL_CATALOG_TERM SqlInfo = 531 - // // Retrieves a boolean value indicating whether a catalog appears at the start of a fully qualified table name. // // - false: if a catalog does not appear at the start of a fully qualified table name; // - true: if a catalog appears at the start of a fully qualified table name. SqlInfo_SQL_CATALOG_AT_START SqlInfo = 532 - // // Retrieves the supported actions for a SQL schema. // // Returns an int32 bitmask value representing the supported actions for a SQL schema. @@ -324,7 +289,6 @@ const ( // - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]. // Valid actions for a SQL schema described under `arrow.flight.protocol.sql.SqlSupportedElementActions`. SqlInfo_SQL_SCHEMAS_SUPPORTED_ACTIONS SqlInfo = 533 - // // Retrieves the supported actions for a SQL schema. // // Returns an int32 bitmask value representing the supported actions for a SQL catalog. @@ -341,7 +305,6 @@ const ( // - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]. // Valid actions for a SQL catalog are described under `arrow.flight.protocol.sql.SqlSupportedElementActions`. SqlInfo_SQL_CATALOGS_SUPPORTED_ACTIONS SqlInfo = 534 - // // Retrieves the supported SQL positioned commands. // // Returns an int32 bitmask value representing the supported SQL positioned commands. @@ -354,14 +317,12 @@ const ( // - return 3 (\b11) => [SQL_POSITIONED_DELETE, SQL_POSITIONED_UPDATE]. // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedPositionedCommands`. SqlInfo_SQL_SUPPORTED_POSITIONED_COMMANDS SqlInfo = 535 - // // Retrieves a boolean value indicating whether SELECT FOR UPDATE statements are supported. // // Returns: // - false: if SELECT FOR UPDATE statements are unsupported; // - true: if SELECT FOR UPDATE statements are supported. SqlInfo_SQL_SELECT_FOR_UPDATE_SUPPORTED SqlInfo = 536 - // // Retrieves a boolean value indicating whether stored procedure calls that use the stored procedure escape syntax // are supported. // @@ -369,7 +330,6 @@ const ( // - false: if stored procedure calls that use the stored procedure escape syntax are unsupported; // - true: if stored procedure calls that use the stored procedure escape syntax are supported. SqlInfo_SQL_STORED_PROCEDURES_SUPPORTED SqlInfo = 537 - // // Retrieves the supported SQL subqueries. // // Returns an int32 bitmask value representing the supported SQL subqueries. @@ -395,14 +355,12 @@ const ( // - ... // Valid SQL subqueries are described under `arrow.flight.protocol.sql.SqlSupportedSubqueries`. SqlInfo_SQL_SUPPORTED_SUBQUERIES SqlInfo = 538 - // // Retrieves a boolean value indicating whether correlated subqueries are supported. // // Returns: // - false: if correlated subqueries are unsupported; // - true: if correlated subqueries are supported. SqlInfo_SQL_CORRELATED_SUBQUERIES_SUPPORTED SqlInfo = 539 - // // Retrieves the supported SQL UNIONs. // // Returns an int32 bitmask value representing the supported SQL UNIONs. @@ -435,7 +393,6 @@ const ( SqlInfo_SQL_MAX_CONNECTIONS SqlInfo = 549 // Retrieves a int64 value the maximum number of characters allowed in a cursor name. SqlInfo_SQL_MAX_CURSOR_NAME_LENGTH SqlInfo = 550 - // // Retrieves a int64 value representing the maximum number of bytes allowed for an index, // including all of the parts of the index. SqlInfo_SQL_MAX_INDEX_LENGTH SqlInfo = 551 @@ -447,17 +404,15 @@ const ( SqlInfo_SQL_MAX_CATALOG_NAME_LENGTH SqlInfo = 554 // Retrieves a int64 value representing the maximum number of bytes allowed in a single row. SqlInfo_SQL_MAX_ROW_SIZE SqlInfo = 555 - // // Retrieves a boolean indicating whether the return value for the JDBC method getMaxRowSize includes the SQL // data types LONGVARCHAR and LONGVARBINARY. // // Returns: - // - false: if return value for the JDBC method getMaxRowSize does - // not include the SQL data types LONGVARCHAR and LONGVARBINARY; - // - true: if return value for the JDBC method getMaxRowSize includes - // the SQL data types LONGVARCHAR and LONGVARBINARY. + // - false: if return value for the JDBC method getMaxRowSize does + // not include the SQL data types LONGVARCHAR and LONGVARBINARY; + // - true: if return value for the JDBC method getMaxRowSize includes + // the SQL data types LONGVARCHAR and LONGVARBINARY. SqlInfo_SQL_MAX_ROW_SIZE_INCLUDES_BLOBS SqlInfo = 556 - // // Retrieves a int64 value representing the maximum number of characters allowed for an SQL statement; // a result of 0 (zero) means that there is no limit or the limit is not known. SqlInfo_SQL_MAX_STATEMENT_LENGTH SqlInfo = 557 @@ -469,13 +424,11 @@ const ( SqlInfo_SQL_MAX_TABLES_IN_SELECT SqlInfo = 560 // Retrieves a int64 value representing the maximum number of characters allowed in a user name. SqlInfo_SQL_MAX_USERNAME_LENGTH SqlInfo = 561 - // // Retrieves this database's default transaction isolation level as described in // `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`. // // Returns a int32 ordinal for the SQL transaction isolation level. SqlInfo_SQL_DEFAULT_TRANSACTION_ISOLATION SqlInfo = 562 - // // Retrieves a boolean value indicating whether transactions are supported. If not, invoking the method commit is a // noop, and the isolation level is `arrow.flight.protocol.sql.SqlTransactionIsolationLevel.TRANSACTION_NONE`. // @@ -483,7 +436,6 @@ const ( // - false: if transactions are unsupported; // - true: if transactions are supported. SqlInfo_SQL_TRANSACTIONS_SUPPORTED SqlInfo = 563 - // // Retrieves the supported transactions isolation levels. // // Returns an int32 bitmask value representing the supported transactions isolation levels. @@ -510,7 +462,6 @@ const ( // - ... // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`. SqlInfo_SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS SqlInfo = 564 - // // Retrieves a boolean value indicating whether a data definition statement within a transaction forces // the transaction to commit. // @@ -518,14 +469,12 @@ const ( // - false: if a data definition statement within a transaction does not force the transaction to commit; // - true: if a data definition statement within a transaction forces the transaction to commit. SqlInfo_SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT SqlInfo = 565 - // // Retrieves a boolean value indicating whether a data definition statement within a transaction is ignored. // // Returns: // - false: if a data definition statement within a transaction is taken into account; // - true: a data definition statement within a transaction is ignored. SqlInfo_SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED SqlInfo = 566 - // // Retrieves an int32 bitmask value representing the supported result set types. // The returned bitmask should be parsed in order to retrieve the supported result set types. // @@ -542,7 +491,6 @@ const ( // - ... // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetType`. SqlInfo_SQL_SUPPORTED_RESULT_SET_TYPES SqlInfo = 567 - // // Returns an int32 bitmask value concurrency types supported for // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_UNSPECIFIED`. // @@ -557,7 +505,6 @@ const ( // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED SqlInfo = 568 - // // Returns an int32 bitmask value concurrency types supported for // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY`. // @@ -572,7 +519,6 @@ const ( // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY SqlInfo = 569 - // // Returns an int32 bitmask value concurrency types supported for // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE`. // @@ -587,7 +533,6 @@ const ( // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE SqlInfo = 570 - // // Returns an int32 bitmask value concurrency types supported for // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE`. // @@ -602,34 +547,29 @@ const ( // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE SqlInfo = 571 - // // Retrieves a boolean value indicating whether this database supports batch updates. // // - false: if this database does not support batch updates; // - true: if this database supports batch updates. SqlInfo_SQL_BATCH_UPDATES_SUPPORTED SqlInfo = 572 - // // Retrieves a boolean value indicating whether this database supports savepoints. // // Returns: // - false: if this database does not support savepoints; // - true: if this database supports savepoints. SqlInfo_SQL_SAVEPOINTS_SUPPORTED SqlInfo = 573 - // // Retrieves a boolean value indicating whether named parameters are supported in callable statements. // // Returns: // - false: if named parameters in callable statements are unsupported; // - true: if named parameters in callable statements are supported. SqlInfo_SQL_NAMED_PARAMETERS_SUPPORTED SqlInfo = 574 - // // Retrieves a boolean value indicating whether updates made to a LOB are made on a copy or directly to the LOB. // // Returns: // - false: if updates made to a LOB are made directly to the LOB; // - true: if updates made to a LOB are made on a copy. SqlInfo_SQL_LOCATORS_UPDATE_COPY SqlInfo = 575 - // // Retrieves a boolean value indicating whether invoking user-defined or vendor functions // using the stored procedure escape syntax is supported. // @@ -1702,7 +1642,7 @@ func (SqlSupportsConvert) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{16} } -//* +// * // The JDBC/ODBC-defined type of any object. // All the values here are the same as in the JDBC and ODBC specs. type XdbcDataType int32 @@ -1817,7 +1757,7 @@ func (XdbcDataType) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{17} } -//* +// * // Detailed subtype information for XDBC_TYPE_DATETIME and XDBC_TYPE_INTERVAL. type XdbcDatetimeSubcode int32 @@ -1958,13 +1898,13 @@ func (XdbcDatetimeSubcode) EnumDescriptor() ([]byte, []int) { type Nullable int32 const ( - //* + // * // Indicates that the fields does not allow the use of null values. Nullable_NULLABILITY_NO_NULLS Nullable = 0 - //* + // * // Indicates that the fields allow the use of null values. Nullable_NULLABILITY_NULLABLE Nullable = 1 - //* + // * // Indicates that nullability of the fields cannot be determined. Nullable_NULLABILITY_UNKNOWN Nullable = 2 ) @@ -2013,21 +1953,21 @@ func (Nullable) EnumDescriptor() ([]byte, []int) { type Searchable int32 const ( - //* + // * // Indicates that column cannot be used in a WHERE clause. Searchable_SEARCHABLE_NONE Searchable = 0 - //* + // * // Indicates that the column can be used in a WHERE clause if it is using a // LIKE operator. Searchable_SEARCHABLE_CHAR Searchable = 1 - //* + // * // Indicates that the column can be used In a WHERE clause with any // operator other than LIKE. // - // - Allowed operators: comparison, quantified comparison, BETWEEN, - // DISTINCT, IN, MATCH, and UNIQUE. + // - Allowed operators: comparison, quantified comparison, BETWEEN, + // DISTINCT, IN, MATCH, and UNIQUE. Searchable_SEARCHABLE_BASIC Searchable = 2 - //* + // * // Indicates that the column can be used in a WHERE clause using any operator. Searchable_SEARCHABLE_FULL Searchable = 3 ) @@ -2293,22 +2233,23 @@ func (ActionCancelQueryResult_CancelResult) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{29, 0} } -// // Represents a metadata request. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the metadata request. // // The returned Arrow schema will be: // < -// info_name: uint32 not null, -// value: dense_union< -// string_value: utf8, -// bool_value: bool, -// bigint_value: int64, -// int32_bitmask: int32, -// string_list: list -// int32_to_int32_list_map: map> +// +// info_name: uint32 not null, +// value: dense_union< +// string_value: utf8, +// bool_value: bool, +// bigint_value: int64, +// int32_bitmask: int32, +// string_list: list +// int32_to_int32_list_map: map> +// // > // where there is one row per requested piece of metadata information. type CommandGetSqlInfo struct { @@ -2316,7 +2257,6 @@ type CommandGetSqlInfo struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide // Flight SQL clients with basic, SQL syntax and SQL functions related information. // More information types can be added in future releases. @@ -2376,61 +2316,62 @@ func (x *CommandGetSqlInfo) GetInfo() []uint32 { return nil } -// // Represents a request to retrieve information about data type supported on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned schema will be: // < -// type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc), -// data_type: int32 not null (The SQL data type), -// column_size: int32 (The maximum size supported by that column. -// In case of exact numeric types, this represents the maximum precision. -// In case of string types, this represents the character length. -// In case of datetime data types, this represents the length in characters of the string representation. -// NULL is returned for data types where column size is not applicable.), -// literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for -// data types where a literal prefix is not applicable.), -// literal_suffix: utf8 (Character or characters used to terminate a literal, -// NULL is returned for data types where a literal suffix is not applicable.), -// create_params: list -// (A list of keywords corresponding to which parameters can be used when creating -// a column for that specific type. -// NULL is returned if there are no parameters for the data type definition.), -// nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the -// Nullable enum.), -// case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons), -// searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the -// Searchable enum.), -// unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is -// not applicable to the data type or the data type is not numeric.), -// fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.), -// auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute -// is not applicable to the data type or the data type is not numeric.), -// local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL -// is returned if a localized name is not supported by the data source), -// minimum_scale: int32 (The minimum scale of the data type on the data source. -// If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE -// columns both contain this value. NULL is returned if scale is not applicable.), -// maximum_scale: int32 (The maximum scale of the data type on the data source. -// NULL is returned if scale is not applicable.), -// sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values -// as data_type value. Except for interval and datetime, which -// uses generic values. More info about those types can be -// obtained through datetime_subcode. The possible values can be seen -// in the XdbcDataType enum.), -// datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains -// its sub types. For type different from interval and datetime, this value -// is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.), -// num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains -// the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For -// exact numeric types, this column contains the value 10 to indicate that -// column size specifies a number of decimal digits. Otherwise, this column is NULL.), -// interval_precision: int32 (If the data type is an interval data type, then this column contains the value -// of the interval leading precision. Otherwise, this column is NULL. This fields -// is only relevant to be used by ODBC). +// +// type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc), +// data_type: int32 not null (The SQL data type), +// column_size: int32 (The maximum size supported by that column. +// In case of exact numeric types, this represents the maximum precision. +// In case of string types, this represents the character length. +// In case of datetime data types, this represents the length in characters of the string representation. +// NULL is returned for data types where column size is not applicable.), +// literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for +// data types where a literal prefix is not applicable.), +// literal_suffix: utf8 (Character or characters used to terminate a literal, +// NULL is returned for data types where a literal suffix is not applicable.), +// create_params: list +// (A list of keywords corresponding to which parameters can be used when creating +// a column for that specific type. +// NULL is returned if there are no parameters for the data type definition.), +// nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the +// Nullable enum.), +// case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons), +// searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the +// Searchable enum.), +// unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is +// not applicable to the data type or the data type is not numeric.), +// fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.), +// auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute +// is not applicable to the data type or the data type is not numeric.), +// local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL +// is returned if a localized name is not supported by the data source), +// minimum_scale: int32 (The minimum scale of the data type on the data source. +// If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE +// columns both contain this value. NULL is returned if scale is not applicable.), +// maximum_scale: int32 (The maximum scale of the data type on the data source. +// NULL is returned if scale is not applicable.), +// sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values +// as data_type value. Except for interval and datetime, which +// uses generic values. More info about those types can be +// obtained through datetime_subcode. The possible values can be seen +// in the XdbcDataType enum.), +// datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains +// its sub types. For type different from interval and datetime, this value +// is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.), +// num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains +// the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For +// exact numeric types, this column contains the value 10 to indicate that +// column size specifies a number of decimal digits. Otherwise, this column is NULL.), +// interval_precision: int32 (If the data type is an interval data type, then this column contains the value +// of the interval leading precision. Otherwise, this column is NULL. This fields +// is only relevant to be used by ODBC). +// // > // The returned data should be ordered by data_type and then by type_name. type CommandGetXdbcTypeInfo struct { @@ -2438,7 +2379,6 @@ type CommandGetXdbcTypeInfo struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Specifies the data type to search for the info. DataType *int32 `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,oneof" json:"data_type,omitempty"` } @@ -2482,16 +2422,17 @@ func (x *CommandGetXdbcTypeInfo) GetDataType() int32 { return 0 } -// // Represents a request to retrieve the list of catalogs on a Flight SQL enabled backend. // The definition of a catalog depends on vendor/implementation. It is usually the database itself // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8 not null +// +// catalog_name: utf8 not null +// // > // The returned data should be ordered by catalog_name. type CommandGetCatalogs struct { @@ -2532,17 +2473,18 @@ func (*CommandGetCatalogs) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{2} } -// // Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend. // The definition of a database schema depends on vendor/implementation. It is usually a collection of tables. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8, -// db_schema_name: utf8 not null +// +// catalog_name: utf8, +// db_schema_name: utf8 not null +// // > // The returned data should be ordered by catalog_name, then db_schema_name. type CommandGetDbSchemas struct { @@ -2550,17 +2492,15 @@ type CommandGetDbSchemas struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Specifies the Catalog to search for the tables. // An empty string retrieves those without a catalog. // If omitted the catalog name should not be used to narrow the search. Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` - // // Specifies a filter pattern for schemas to search for. // When no db_schema_filter_pattern is provided, the pattern will not be used to narrow the search. // In the pattern string, two special characters can be used to denote matching rules: - // - "%" means to match any substring with 0 or more characters. - // - "_" means to match any one character. + // - "%" means to match any substring with 0 or more characters. + // - "_" means to match any one character. DbSchemaFilterPattern *string `protobuf:"bytes,2,opt,name=db_schema_filter_pattern,json=dbSchemaFilterPattern,proto3,oneof" json:"db_schema_filter_pattern,omitempty"` } @@ -2610,58 +2550,56 @@ func (x *CommandGetDbSchemas) GetDbSchemaFilterPattern() string { return "" } -// // Represents a request to retrieve the list of tables, and optionally their schemas, on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8, -// db_schema_name: utf8, -// table_name: utf8 not null, -// table_type: utf8 not null, -// [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema, -// it is serialized as an IPC message.) +// +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// table_type: utf8 not null, +// [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema, +// it is serialized as an IPC message.) +// // > // Fields on table_schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// // The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. type CommandGetTables struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Specifies the Catalog to search for the tables. // An empty string retrieves those without a catalog. // If omitted the catalog name should not be used to narrow the search. Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` - // // Specifies a filter pattern for schemas to search for. // When no db_schema_filter_pattern is provided, all schemas matching other filters are searched. // In the pattern string, two special characters can be used to denote matching rules: - // - "%" means to match any substring with 0 or more characters. - // - "_" means to match any one character. + // - "%" means to match any substring with 0 or more characters. + // - "_" means to match any one character. DbSchemaFilterPattern *string `protobuf:"bytes,2,opt,name=db_schema_filter_pattern,json=dbSchemaFilterPattern,proto3,oneof" json:"db_schema_filter_pattern,omitempty"` - // // Specifies a filter pattern for tables to search for. // When no table_name_filter_pattern is provided, all tables matching other filters are searched. // In the pattern string, two special characters can be used to denote matching rules: - // - "%" means to match any substring with 0 or more characters. - // - "_" means to match any one character. + // - "%" means to match any substring with 0 or more characters. + // - "_" means to match any one character. TableNameFilterPattern *string `protobuf:"bytes,3,opt,name=table_name_filter_pattern,json=tableNameFilterPattern,proto3,oneof" json:"table_name_filter_pattern,omitempty"` - // // Specifies a filter of table types which must match. // The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables. // TABLE, VIEW, and SYSTEM TABLE are commonly supported. @@ -2737,17 +2675,18 @@ func (x *CommandGetTables) GetIncludeSchema() bool { return false } -// // Represents a request to retrieve the list of table types on a Flight SQL enabled backend. // The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables. // TABLE, VIEW, and SYSTEM TABLE are commonly supported. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// table_type: utf8 not null +// +// table_type: utf8 not null +// // > // The returned data should be ordered by table_type. type CommandGetTableTypes struct { @@ -2788,20 +2727,21 @@ func (*CommandGetTableTypes) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{5} } -// // Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8, -// db_schema_name: utf8, -// table_name: utf8 not null, -// column_name: utf8 not null, -// key_name: utf8, -// key_sequence: int32 not null +// +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// column_name: utf8 not null, +// key_name: utf8, +// key_sequence: int32 not null +// // > // The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. type CommandGetPrimaryKeys struct { @@ -2809,12 +2749,10 @@ type CommandGetPrimaryKeys struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Specifies the catalog to search for the table. // An empty string retrieves those without a catalog. // If omitted the catalog name should not be used to narrow the search. Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` - // // Specifies the schema to search for the table. // An empty string retrieves those without a schema. // If omitted the schema name should not be used to narrow the search. @@ -2876,28 +2814,29 @@ func (x *CommandGetPrimaryKeys) GetTable() string { return "" } -// // Represents a request to retrieve a description of the foreign key columns that reference the given table's // primary key columns (the foreign keys exported by a table) of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null +// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null +// // > // The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. @@ -2906,12 +2845,10 @@ type CommandGetExportedKeys struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Specifies the catalog to search for the foreign key table. // An empty string retrieves those without a catalog. // If omitted the catalog name should not be used to narrow the search. Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` - // // Specifies the schema to search for the foreign key table. // An empty string retrieves those without a schema. // If omitted the schema name should not be used to narrow the search. @@ -2973,46 +2910,45 @@ func (x *CommandGetExportedKeys) GetTable() string { return "" } -// // Represents a request to retrieve the foreign keys of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null +// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null +// // > // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions: -// - 0 = CASCADE -// - 1 = RESTRICT -// - 2 = SET NULL -// - 3 = NO ACTION -// - 4 = SET DEFAULT +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT type CommandGetImportedKeys struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // // Specifies the catalog to search for the primary key table. // An empty string retrieves those without a catalog. // If omitted the catalog name should not be used to narrow the search. Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` - // // Specifies the schema to search for the primary key table. // An empty string retrieves those without a schema. // If omitted the schema name should not be used to narrow the search. @@ -3074,66 +3010,67 @@ func (x *CommandGetImportedKeys) GetTable() string { return "" } -// // Represents a request to retrieve a description of the foreign key columns in the given foreign key table that // reference the primary key or the columns representing a unique constraint of the parent table (could be the same // or a different table) on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null +// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null +// // > // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions: -// - 0 = CASCADE -// - 1 = RESTRICT -// - 2 = SET NULL -// - 3 = NO ACTION -// - 4 = SET DEFAULT +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT type CommandGetCrossReference struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - //* + // * // The catalog name where the parent table is. // An empty string retrieves those without a catalog. // If omitted the catalog name should not be used to narrow the search. PkCatalog *string `protobuf:"bytes,1,opt,name=pk_catalog,json=pkCatalog,proto3,oneof" json:"pk_catalog,omitempty"` - //* + // * // The Schema name where the parent table is. // An empty string retrieves those without a schema. // If omitted the schema name should not be used to narrow the search. PkDbSchema *string `protobuf:"bytes,2,opt,name=pk_db_schema,json=pkDbSchema,proto3,oneof" json:"pk_db_schema,omitempty"` - //* + // * // The parent table name. It cannot be null. PkTable string `protobuf:"bytes,3,opt,name=pk_table,json=pkTable,proto3" json:"pk_table,omitempty"` - //* + // * // The catalog name where the foreign table is. // An empty string retrieves those without a catalog. // If omitted the catalog name should not be used to narrow the search. FkCatalog *string `protobuf:"bytes,4,opt,name=fk_catalog,json=fkCatalog,proto3,oneof" json:"fk_catalog,omitempty"` - //* + // * // The schema name where the foreign table is. // An empty string retrieves those without a schema. // If omitted the schema name should not be used to narrow the search. FkDbSchema *string `protobuf:"bytes,5,opt,name=fk_db_schema,json=fkDbSchema,proto3,oneof" json:"fk_db_schema,omitempty"` - //* + // * // The foreign table name. It cannot be null. FkTable string `protobuf:"bytes,6,opt,name=fk_table,json=fkTable,proto3" json:"fk_table,omitempty"` } @@ -3212,7 +3149,6 @@ func (x *CommandGetCrossReference) GetFkTable() string { return "" } -// // Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. type ActionCreatePreparedStatementRequest struct { state protoimpl.MessageState @@ -3272,7 +3208,6 @@ func (x *ActionCreatePreparedStatementRequest) GetTransactionId() []byte { return nil } -// // An embedded message describing a Substrait plan to execute. type SubstraitPlan struct { state protoimpl.MessageState @@ -3336,7 +3271,6 @@ func (x *SubstraitPlan) GetVersion() string { return "" } -// // Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend. type ActionCreatePreparedSubstraitPlanRequest struct { state protoimpl.MessageState @@ -3396,7 +3330,6 @@ func (x *ActionCreatePreparedSubstraitPlanRequest) GetTransactionId() []byte { return nil } -// // Wrap the result of a "CreatePreparedStatement" or "CreatePreparedSubstraitPlan" action. // // The resultant PreparedStatement can be closed either: @@ -3412,10 +3345,13 @@ type ActionCreatePreparedStatementResult struct { // Opaque handle for the prepared statement on the server. PreparedStatementHandle []byte `protobuf:"bytes,1,opt,name=prepared_statement_handle,json=preparedStatementHandle,proto3" json:"prepared_statement_handle,omitempty"` // If a result set generating query was provided, dataset_schema contains the - // schema of the dataset as described in Schema.fbs::Schema, it is serialized as an IPC message. + // schema of the result set. It should be an IPC-encapsulated Schema, as described in Schema.fbs. + // For some queries, the schema of the results may depend on the schema of the parameters. The server + // should provide its best guess as to the schema at this point. Clients must not assume that this + // schema, if provided, will be accurate. DatasetSchema []byte `protobuf:"bytes,2,opt,name=dataset_schema,json=datasetSchema,proto3" json:"dataset_schema,omitempty"` // If the query provided contained parameters, parameter_schema contains the - // schema of the expected parameters as described in Schema.fbs::Schema, it is serialized as an IPC message. + // schema of the expected parameters. It should be an IPC-encapsulated Schema, as described in Schema.fbs. ParameterSchema []byte `protobuf:"bytes,3,opt,name=parameter_schema,json=parameterSchema,proto3" json:"parameter_schema,omitempty"` } @@ -3472,7 +3408,6 @@ func (x *ActionCreatePreparedStatementResult) GetParameterSchema() []byte { return nil } -// // Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend. // Closes server resources associated with the prepared statement handle. type ActionClosePreparedStatementRequest struct { @@ -3523,7 +3458,6 @@ func (x *ActionClosePreparedStatementRequest) GetPreparedStatementHandle() []byt return nil } -// // Request message for the "BeginTransaction" action. // Begins a transaction. type ActionBeginTransactionRequest struct { @@ -3564,7 +3498,6 @@ func (*ActionBeginTransactionRequest) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{15} } -// // Request message for the "BeginSavepoint" action. // Creates a savepoint within a transaction. // @@ -3627,7 +3560,6 @@ func (x *ActionBeginSavepointRequest) GetName() string { return "" } -// // The result of a "BeginTransaction" action. // // The transaction can be manipulated with the "EndTransaction" action, or @@ -3683,7 +3615,6 @@ func (x *ActionBeginTransactionResult) GetTransactionId() []byte { return nil } -// // The result of a "BeginSavepoint" action. // // The transaction can be manipulated with the "EndSavepoint" action. @@ -3739,7 +3670,6 @@ func (x *ActionBeginSavepointResult) GetSavepointId() []byte { return nil } -// // Request message for the "EndTransaction" action. // // Commit (COMMIT) or rollback (ROLLBACK) the transaction. @@ -3803,7 +3733,6 @@ func (x *ActionEndTransactionRequest) GetAction() ActionEndTransactionRequest_En return ActionEndTransactionRequest_END_TRANSACTION_UNSPECIFIED } -// // Request message for the "EndSavepoint" action. // // Release (RELEASE) the savepoint or rollback (ROLLBACK) to the @@ -3869,22 +3798,21 @@ func (x *ActionEndSavepointRequest) GetAction() ActionEndSavepointRequest_EndSav return ActionEndSavepointRequest_END_SAVEPOINT_UNSPECIFIED } -// // Represents a SQL query. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - GetFlightInfo: execute the query. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - GetFlightInfo: execute the query. type CommandStatementQuery struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3942,23 +3870,22 @@ func (x *CommandStatementQuery) GetTransactionId() []byte { return nil } -// // Represents a Substrait plan. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - GetFlightInfo: execute the query. -// - DoPut: execute the query. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - GetFlightInfo: execute the query. +// - DoPut: execute the query. type CommandStatementSubstraitPlan struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4016,7 +3943,7 @@ func (x *CommandStatementSubstraitPlan) GetTransactionId() []byte { return nil } -//* +// * // Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery. // This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. type TicketStatementQuery struct { @@ -4067,23 +3994,38 @@ func (x *TicketStatementQuery) GetStatementHandle() []byte { return nil } -// // Represents an instance of executing a prepared statement. Used in the command member of FlightDescriptor for // the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. -// - GetFlightInfo: execute the prepared statement instance. +// +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// +// If the schema is retrieved after parameter values have been bound with DoPut, then the server should account +// for the parameters when determining the schema. +// +// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. +// +// - GetFlightInfo: execute the prepared statement instance. type CommandPreparedStatementQuery struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4132,7 +4074,6 @@ func (x *CommandPreparedStatementQuery) GetPreparedStatementHandle() []byte { return nil } -// // Represents a SQL update query. Used in the command member of FlightDescriptor // for the RPC call DoPut to cause the server to execute the included SQL update. type CommandStatementUpdate struct { @@ -4192,7 +4133,6 @@ func (x *CommandStatementUpdate) GetTransactionId() []byte { return nil } -// // Represents a SQL update query. Used in the command member of FlightDescriptor // for the RPC call DoPut to cause the server to execute the included // prepared statement handle as an update. @@ -4244,7 +4184,6 @@ func (x *CommandPreparedStatementUpdate) GetPreparedStatementHandle() []byte { return nil } -// // Returned from the RPC call DoPut when a CommandStatementUpdate // CommandPreparedStatementUpdate was in the request, containing // results from the update. @@ -4297,7 +4236,6 @@ func (x *DoPutUpdateResult) GetRecordCount() int64 { return 0 } -// // Request message for the "CancelQuery" action. // // Explicitly cancel a running query. @@ -4314,7 +4252,7 @@ func (x *DoPutUpdateResult) GetRecordCount() int64 { // This command is deprecated since 13.0.0. Use the "CancelFlightInfo" // action with DoAction instead. // -// Deprecated: Do not use. +// Deprecated: Marked as deprecated in FlightSql.proto. type ActionCancelQueryRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4366,7 +4304,6 @@ func (x *ActionCancelQueryRequest) GetInfo() []byte { return nil } -// // The result of cancelling a query. // // The result should be wrapped in a google.protobuf.Any message. @@ -4374,7 +4311,7 @@ func (x *ActionCancelQueryRequest) GetInfo() []byte { // This command is deprecated since 13.0.0. Use the "CancelFlightInfo" // action with DoAction instead. // -// Deprecated: Do not use. +// Deprecated: Marked as deprecated in FlightSql.proto. type ActionCancelQueryResult struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4676,7 +4613,7 @@ var file_FlightSql_proto_rawDesc = []byte{ 0x22, 0x35, 0x0a, 0x18, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, - 0x3a, 0x05, 0x18, 0x01, 0xc0, 0x3e, 0x01, 0x22, 0x87, 0x02, 0x0a, 0x17, 0x41, 0x63, 0x74, 0x69, + 0x3a, 0x05, 0xc0, 0x3e, 0x01, 0x18, 0x01, 0x22, 0x87, 0x02, 0x0a, 0x17, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x57, 0x0a, 0x06, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x3f, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, @@ -4692,7 +4629,7 @@ var file_FlightSql_proto_rawDesc = []byte{ 0x43, 0x45, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x21, 0x0a, 0x1d, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x41, 0x4e, - 0x43, 0x45, 0x4c, 0x4c, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x3a, 0x05, 0x18, 0x01, 0xc0, 0x3e, + 0x43, 0x45, 0x4c, 0x4c, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x3a, 0x05, 0xc0, 0x3e, 0x01, 0x18, 0x01, 0x2a, 0xb7, 0x18, 0x0a, 0x07, 0x53, 0x71, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x1a, 0x0a, 0x16, 0x46, 0x4c, 0x49, 0x47, 0x48, 0x54, 0x5f, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x10, 0x00, 0x12, 0x1d, 0x0a, 0x19, 0x46, 0x4c, 0x49, diff --git a/go/arrow/flight/gen/flight/Flight_grpc.pb.go b/go/arrow/flight/gen/flight/Flight_grpc.pb.go index 87d9abc5926eb..237cb1fe2dfb5 100644 --- a/go/arrow/flight/gen/flight/Flight_grpc.pb.go +++ b/go/arrow/flight/gen/flight/Flight_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.2.0 -// - protoc v4.23.4 +// - protoc v4.25.2 // source: Flight.proto package flight @@ -22,13 +22,11 @@ const _ = grpc.SupportPackageIsVersion7 // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. type FlightServiceClient interface { - // // Handshake between client and server. Depending on the server, the // handshake may be required to determine the token that should be used for // future operations. Both request and response are streams to allow multiple // round-trips depending on auth mechanism. Handshake(ctx context.Context, opts ...grpc.CallOption) (FlightService_HandshakeClient, error) - // // Get a list of available streams given a particular criteria. Most flight // services will expose one or more streams that are readily available for // retrieval. This api allows listing the streams available for @@ -36,7 +34,6 @@ type FlightServiceClient interface { // the subset of streams that can be listed via this interface. Each flight // service allows its own definition of how to consume criteria. ListFlights(ctx context.Context, in *Criteria, opts ...grpc.CallOption) (FlightService_ListFlightsClient, error) - // // For a given FlightDescriptor, get information about how the flight can be // consumed. This is a useful interface if the consumer of the interface // already can identify the specific flight to consume. This interface can @@ -48,7 +45,6 @@ type FlightServiceClient interface { // available for consumption for the duration defined by the specific flight // service. GetFlightInfo(ctx context.Context, in *FlightDescriptor, opts ...grpc.CallOption) (*FlightInfo, error) - // // For a given FlightDescriptor, start a query and get information // to poll its execution status. This is a useful interface if the // query may be a long-running query. The first PollFlightInfo call @@ -72,19 +68,16 @@ type FlightServiceClient interface { // A client may use the CancelFlightInfo action with // PollInfo.info to cancel the running query. PollFlightInfo(ctx context.Context, in *FlightDescriptor, opts ...grpc.CallOption) (*PollInfo, error) - // // For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema // This is used when a consumer needs the Schema of flight stream. Similar to // GetFlightInfo this interface may generate a new flight that was not previously // available in ListFlights. GetSchema(ctx context.Context, in *FlightDescriptor, opts ...grpc.CallOption) (*SchemaResult, error) - // // Retrieve a single stream associated with a particular descriptor // associated with the referenced ticket. A Flight can be composed of one or // more streams where each stream can be retrieved using a separate opaque // ticket that the flight service uses for managing a collection of streams. DoGet(ctx context.Context, in *Ticket, opts ...grpc.CallOption) (FlightService_DoGetClient, error) - // // Push a stream to the flight service associated with a particular // flight stream. This allows a client of a flight service to upload a stream // of data. Depending on the particular flight service, a client consumer @@ -92,14 +85,12 @@ type FlightServiceClient interface { // number. In the latter, the service might implement a 'seal' action that // can be applied to a descriptor once all streams are uploaded. DoPut(ctx context.Context, opts ...grpc.CallOption) (FlightService_DoPutClient, error) - // // Open a bidirectional data channel for a given descriptor. This // allows clients to send and receive arbitrary Arrow data and // application-specific metadata in a single logical stream. In // contrast to DoGet/DoPut, this is more suited for clients // offloading computation (rather than storage) to a Flight service. DoExchange(ctx context.Context, opts ...grpc.CallOption) (FlightService_DoExchangeClient, error) - // // Flight services can support an arbitrary number of simple actions in // addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut // operations that are potentially available. DoAction allows a flight client @@ -107,7 +98,6 @@ type FlightServiceClient interface { // opaque request and response objects that are specific to the type action // being undertaken. DoAction(ctx context.Context, in *Action, opts ...grpc.CallOption) (FlightService_DoActionClient, error) - // // A flight service exposes all of the available action types that it has // along with descriptions. This allows different flight consumers to // understand the capabilities of the flight service. @@ -374,13 +364,11 @@ func (x *flightServiceListActionsClient) Recv() (*ActionType, error) { // All implementations must embed UnimplementedFlightServiceServer // for forward compatibility type FlightServiceServer interface { - // // Handshake between client and server. Depending on the server, the // handshake may be required to determine the token that should be used for // future operations. Both request and response are streams to allow multiple // round-trips depending on auth mechanism. Handshake(FlightService_HandshakeServer) error - // // Get a list of available streams given a particular criteria. Most flight // services will expose one or more streams that are readily available for // retrieval. This api allows listing the streams available for @@ -388,7 +376,6 @@ type FlightServiceServer interface { // the subset of streams that can be listed via this interface. Each flight // service allows its own definition of how to consume criteria. ListFlights(*Criteria, FlightService_ListFlightsServer) error - // // For a given FlightDescriptor, get information about how the flight can be // consumed. This is a useful interface if the consumer of the interface // already can identify the specific flight to consume. This interface can @@ -400,7 +387,6 @@ type FlightServiceServer interface { // available for consumption for the duration defined by the specific flight // service. GetFlightInfo(context.Context, *FlightDescriptor) (*FlightInfo, error) - // // For a given FlightDescriptor, start a query and get information // to poll its execution status. This is a useful interface if the // query may be a long-running query. The first PollFlightInfo call @@ -424,19 +410,16 @@ type FlightServiceServer interface { // A client may use the CancelFlightInfo action with // PollInfo.info to cancel the running query. PollFlightInfo(context.Context, *FlightDescriptor) (*PollInfo, error) - // // For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema // This is used when a consumer needs the Schema of flight stream. Similar to // GetFlightInfo this interface may generate a new flight that was not previously // available in ListFlights. GetSchema(context.Context, *FlightDescriptor) (*SchemaResult, error) - // // Retrieve a single stream associated with a particular descriptor // associated with the referenced ticket. A Flight can be composed of one or // more streams where each stream can be retrieved using a separate opaque // ticket that the flight service uses for managing a collection of streams. DoGet(*Ticket, FlightService_DoGetServer) error - // // Push a stream to the flight service associated with a particular // flight stream. This allows a client of a flight service to upload a stream // of data. Depending on the particular flight service, a client consumer @@ -444,14 +427,12 @@ type FlightServiceServer interface { // number. In the latter, the service might implement a 'seal' action that // can be applied to a descriptor once all streams are uploaded. DoPut(FlightService_DoPutServer) error - // // Open a bidirectional data channel for a given descriptor. This // allows clients to send and receive arbitrary Arrow data and // application-specific metadata in a single logical stream. In // contrast to DoGet/DoPut, this is more suited for clients // offloading computation (rather than storage) to a Flight service. DoExchange(FlightService_DoExchangeServer) error - // // Flight services can support an arbitrary number of simple actions in // addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut // operations that are potentially available. DoAction allows a flight client @@ -459,7 +440,6 @@ type FlightServiceServer interface { // opaque request and response objects that are specific to the type action // being undertaken. DoAction(*Action, FlightService_DoActionServer) error - // // A flight service exposes all of the available action types that it has // along with descriptions. This allows different flight consumers to // understand the capabilities of the flight service. diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go index c70aceabcfe8e..2df52434a2a32 100644 --- a/go/arrow/flight/server.go +++ b/go/arrow/flight/server.go @@ -18,6 +18,7 @@ package flight import ( "context" + "fmt" "net" "os" "os/signal" @@ -54,6 +55,14 @@ type ( Result = flight.Result CancelFlightInfoResult = flight.CancelFlightInfoResult CancelStatus = flight.CancelStatus + SessionOptionValue = flight.SessionOptionValue + SetSessionOptionsRequest = flight.SetSessionOptionsRequest + SetSessionOptionsResult = flight.SetSessionOptionsResult + SetSessionOptionsResultError = flight.SetSessionOptionsResult_Error + GetSessionOptionsRequest = flight.GetSessionOptionsRequest + GetSessionOptionsResult = flight.GetSessionOptionsResult + CloseSessionRequest = flight.CloseSessionRequest + CloseSessionResult = flight.CloseSessionResult Empty = flight.Empty ) @@ -61,8 +70,75 @@ type ( const ( CancelFlightInfoActionType = "CancelFlightInfo" RenewFlightEndpointActionType = "RenewFlightEndpoint" + SetSessionOptionsActionType = "SetSessionOptions" + GetSessionOptionsActionType = "GetSessionOptions" + CloseSessionActionType = "CloseSession" ) +const ( + // The set option error is unknown. Servers should avoid + // using this value (send a NOT_FOUND error if the requested + // FlightInfo is not known). Clients can retry the request. + SetSessionOptionsResultErrorUnspecified = flight.SetSessionOptionsResult_UNSPECIFIED + // The given session option name is invalid. + SetSessionOptionsResultErrorInvalidName = flight.SetSessionOptionsResult_INVALID_NAME + // The session option value or type is invalid. + SetSessionOptionsResultErrorInvalidValue = flight.SetSessionOptionsResult_INVALID_VALUE + // The session option cannot be set. + SetSessionOptionsResultErrorError = flight.SetSessionOptionsResult_ERROR +) + +const ( + // The close session status is unknown. Servers should avoid + // using this value (send a NOT_FOUND error if the requested + // FlightInfo is not known). Clients can retry the request. + CloseSessionResultUnspecified = flight.CloseSessionResult_UNSPECIFIED + // The session close request is complete. + CloseSessionResultClosed = flight.CloseSessionResult_CLOSED + // The session close request is in progress. The client may retry the request. + CloseSessionResultClosing = flight.CloseSessionResult_CLOSING + // The session is not closeable. + CloseSessionResultNotCloseable = flight.CloseSessionResult_NOT_CLOSEABLE +) + +// NewSessionOptionValues returns a map with the same keys as the input map, but with all values converted +// to SessionOptionValues. If any values fail conversion, an error will be returned. +func NewSessionOptionValues(options map[string]any) (map[string]*flight.SessionOptionValue, error) { + sessionOptions := make(map[string]*flight.SessionOptionValue, len(options)) + for key, val := range options { + optval, err := NewSessionOptionValue(val) + if err != nil { + return nil, err + } + sessionOptions[key] = &optval + } + + return sessionOptions, nil +} + +// NewSessionOptionValue takes any value and constructs a SessionOptionValue suitable for setting session values. +// An error will be returned if the value is not one of the types supported by SessionOptionValue. +func NewSessionOptionValue(value any) (flight.SessionOptionValue, error) { + if value == nil { + return flight.SessionOptionValue{}, nil + } + + switch val := value.(type) { + case string: + return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_StringValue{StringValue: val}}, nil + case bool: + return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_BoolValue{BoolValue: val}}, nil + case int64: + return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_Int64Value{Int64Value: val}}, nil + case float64: + return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_DoubleValue{DoubleValue: val}}, nil + case []string: + return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_StringListValue_{StringListValue: &flight.SessionOptionValue_StringListValue{Values: val}}}, nil + default: + return flight.SessionOptionValue{}, fmt.Errorf("invalid option type %[1]T for value %[1]v", val) + } +} + // Constants for CancelStatus const ( // The cancellation status is unknown. Servers should avoid diff --git a/go/arrow/flight/session/cookies.go b/go/arrow/flight/session/cookies.go new file mode 100644 index 0000000000000..85dc5d10941ab --- /dev/null +++ b/go/arrow/flight/session/cookies.go @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package session + +import ( + "context" + "fmt" + "net/http" + + "google.golang.org/grpc/metadata" +) + +func GetIncomingCookieByName(ctx context.Context, name string) (http.Cookie, error) { + md, ok := metadata.FromIncomingContext(ctx) + if !ok { + return http.Cookie{}, fmt.Errorf("no metadata found for incoming context") + } + + header := make(http.Header, md.Len()) + for k, v := range md { + for _, val := range v { + header.Add(k, val) + } + } + + cookie, err := (&http.Request{Header: header}).Cookie(name) + if err != nil { + return http.Cookie{}, err + } + + if cookie == nil { + return http.Cookie{}, fmt.Errorf("failed to get cookie with name: %s", name) + } + + return *cookie, nil +} + +func CreateCookieForSession(session ServerSession) (http.Cookie, error) { + var key string + + if session == nil { + return http.Cookie{}, ErrNoSession + } + + switch s := session.(type) { + case *statefulServerSession: + key = StatefulSessionCookieName + case *statelessServerSession: + key = StatelessSessionCookieName + default: + return http.Cookie{}, fmt.Errorf("cannot serialize session of type %T as cookie", s) + } + + // Reuse the std http lib functionality for constructing cookies + cookie, err := (&http.Request{ + Header: http.Header{"Cookie": []string{fmt.Sprintf("%s=%s", key, session.Token())}}, + }).Cookie(key) + if err != nil { + return http.Cookie{}, err + } + if cookie == nil { + return http.Cookie{}, fmt.Errorf("failed to construct cookie for session: %s", session.Token()) + } + + return *cookie, nil +} diff --git a/go/arrow/flight/session/example_session_test.go b/go/arrow/flight/session/example_session_test.go new file mode 100644 index 0000000000000..705a0b792ef97 --- /dev/null +++ b/go/arrow/flight/session/example_session_test.go @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package session_test + +import ( + "log" + + "github.com/apache/arrow/go/v16/arrow/flight" + "github.com/apache/arrow/go/v16/arrow/flight/flightsql" + "github.com/apache/arrow/go/v16/arrow/flight/session" + "github.com/google/uuid" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func Example_defaultMiddleware() { + // Setup server with default session middleware + middleware := session.NewServerSessionMiddleware(nil) + srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{ + flight.CreateServerMiddleware(middleware), + }) + srv.RegisterFlightService(flightsql.NewFlightServer(&flightsql.BaseServer{})) + srv.Init("localhost:0") + + go srv.Serve() + defer srv.Shutdown() + + // Client will require cookie middleware in order to handle cookie-based server sessions + client, err := flightsql.NewClient( + srv.Addr().String(), + nil, + []flight.ClientMiddleware{ + flight.NewClientCookieMiddleware(), + }, + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + log.Fatal(err) + } + defer client.Close() + +} + +func Example_customStatefulMiddleware() { + // Generate IDs for new sessions using provided function + factory := session.NewSessionFactory(uuid.NewString) + + // Create a SessionStore to persist sessions. + // In-memory store is default; you may provide your own implementation. + store := session.NewSessionStore() + + // Construct the middleware with the custom manager. + manager := session.NewStatefulServerSessionManager(session.WithFactory(factory), session.WithStore(store)) + middleware := session.NewServerSessionMiddleware(manager) + _ = middleware // ... remaining setup is the same as DefaultMiddleware example +} + +func Example_statelessMiddleware() { + // Construct the middleware with the stateless manager. + manager := session.NewStatelessServerSessionManager() + middleware := session.NewServerSessionMiddleware(manager) + _ = middleware // ... remaining setup is the same as DefaultMiddleware example +} diff --git a/go/arrow/flight/session/session.go b/go/arrow/flight/session/session.go new file mode 100644 index 0000000000000..598c393ecea9d --- /dev/null +++ b/go/arrow/flight/session/session.go @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package session provides server middleware and reference implementations for Flight session management. +// +// For more details on the Flight Session Specification, see: +// https://arrow.apache.org/docs/format/FlightSql.html#flight-server-session-management +// +// [NewServerSessionMiddleware] manages sessions using cookies, so any client would need its own +// middleware/support for storing and sending those cookies. The cookies may be stateful or stateless: +// +// - [NewStatefulServerSessionManager] implements stateful cookies. +// +// - [NewStatelessServerSessionManager] implements stateless cookies. +// +// See details of either implementation for caveats and recommended usage scenarios. +package session + +import ( + "context" + "errors" + "fmt" + "net/http" + "sync" + + "github.com/apache/arrow/go/v16/arrow/flight" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" +) + +var ErrNoSession error = errors.New("flight: server session not present") + +type sessionMiddlewareKey struct{} + +// NewSessionContex returns a copy of the provided context containing the provided ServerSession +func NewSessionContext(ctx context.Context, session ServerSession) context.Context { + return context.WithValue(ctx, sessionMiddlewareKey{}, session) +} + +// GetSessionFromContext retrieves the ServerSession from the provided context if it exists. +// An error indicates that the session was not found in the context. +func GetSessionFromContext(ctx context.Context) (ServerSession, error) { + session, ok := ctx.Value(sessionMiddlewareKey{}).(ServerSession) + if !ok { + return nil, ErrNoSession + } + return session, nil +} + +// ServerSession is a container for named SessionOptionValues +type ServerSession interface { + // An identifier for the session that the server can use to reconstruct + // the session state on future requests. It is the responsibility of + // each implementation to define the token's semantics. + Token() string + // Get session option value by name, or nil if it does not exist + GetSessionOption(name string) *flight.SessionOptionValue + // Get a copy of the session options + GetSessionOptions() map[string]*flight.SessionOptionValue + // Set session option by name to given value + SetSessionOption(name string, value *flight.SessionOptionValue) + // Idempotently remove name from this session + EraseSessionOption(name string) + // Close the session + Close() error + // Report whether the session has been closed + Closed() bool +} + +// ServerSessionManager handles session lifecycle management +type ServerSessionManager interface { + // Create a new, empty ServerSession + CreateSession(ctx context.Context) (ServerSession, error) + // Get the current ServerSession, if one exists + GetSession(ctx context.Context) (ServerSession, error) + // Cleanup any resources associated with the current ServerSession + CloseSession(session ServerSession) error +} + +// Implementation of common session behavior. Intended to be extended +// by specific session implementations. +type serverSession struct { + closed bool + + options map[string]*flight.SessionOptionValue + mu sync.RWMutex +} + +func (session *serverSession) GetSessionOption(name string) *flight.SessionOptionValue { + session.mu.RLock() + defer session.mu.RUnlock() + value, found := session.options[name] + if !found { + return nil + } + + return value +} + +func (session *serverSession) GetSessionOptions() map[string]*flight.SessionOptionValue { + options := make(map[string]*flight.SessionOptionValue, len(session.options)) + + session.mu.RLock() + defer session.mu.RUnlock() + for k, v := range session.options { + options[k] = proto.Clone(v).(*flight.SessionOptionValue) + } + + return options +} + +func (session *serverSession) SetSessionOption(name string, value *flight.SessionOptionValue) { + if value.GetOptionValue() == nil { + session.EraseSessionOption(name) + return + } + + session.mu.Lock() + defer session.mu.Unlock() + session.options[name] = value +} + +func (session *serverSession) EraseSessionOption(name string) { + session.mu.Lock() + defer session.mu.Unlock() + delete(session.options, name) +} + +func (session *serverSession) Close() error { + session.options = nil + session.closed = true + return nil +} + +func (session *serverSession) Closed() bool { + return session.closed +} + +// NewServerSessionMiddleware creates new instance of CustomServerMiddleware implementing server session persistence. +// +// The provided manager can be used to customize session implementation/behavior. +// If no manager is provided, a stateful in-memory, goroutine-safe implementation is used. +func NewServerSessionMiddleware(manager ServerSessionManager) *serverSessionMiddleware { + // Default manager + if manager == nil { + manager = NewStatefulServerSessionManager() + } + return &serverSessionMiddleware{manager: manager} +} + +type serverSessionMiddleware struct { + manager ServerSessionManager +} + +// Get the existing session if one is found, otherwise create one. The resulting context will contain +// the session at a well-known key for any internal RPC methods to read/update. +func (middleware *serverSessionMiddleware) StartCall(ctx context.Context) context.Context { + session, err := middleware.manager.GetSession(ctx) + if err == nil { + return NewSessionContext(ctx, session) + } + + if err != ErrNoSession { + panic(err) + } + + session, err = middleware.manager.CreateSession(ctx) + if err != nil { + panic(err) + } + + // TODO(joellubi): Remove this once Java clients support receiving cookies in gRPC trailer. + // Currently, both C++ and Go client cookie middlewares merge the header and trailer when setting cookies. + // Java middleware checks the metadata in the header, but only reads the trailer when there is an error. + // It is far simpler to only set cookies in the trailer, especially for streaming RPC. + sessionCookie, err := CreateCookieForSession(session) + if err != nil { + panic(err) + } + grpc.SetHeader(ctx, metadata.Pairs("Set-Cookie", sessionCookie.String())) + + return NewSessionContext(ctx, session) +} + +// Determine if the session state has changed. If it has then we need to inform the client +// with a new cookie. The cookie is sent in the gRPC trailer because we would like to +// determine its contents based on the final state the session at the end of the RPC call. +func (middleware *serverSessionMiddleware) CallCompleted(ctx context.Context, _ error) { + session, err := middleware.manager.GetSession(ctx) + if err != nil { + panic(fmt.Sprintf("failed to get server session: %s", err)) + } + + sessionCookie, err := CreateCookieForSession(session) + if err != nil { + panic(err) + } + + clientCookie, err := GetIncomingCookieByName(ctx, sessionCookie.Name) + if err == http.ErrNoCookie { + grpc.SetTrailer(ctx, metadata.Pairs("Set-Cookie", sessionCookie.String())) + return + } + + if err != nil { + panic(err) + } + + if session.Closed() { + // Invalidate the client's cookie + clientCookie.MaxAge = -1 + grpc.SetTrailer(ctx, metadata.Pairs("Set-Cookie", clientCookie.String())) + + if err = middleware.manager.CloseSession(session); err != nil { + panic(fmt.Sprintf("failed to close server session: %s", err)) + } + return + } + + if sessionCookie.String() != clientCookie.String() { + grpc.SetTrailer(ctx, metadata.Pairs("Set-Cookie", sessionCookie.String())) + } + + // If the resulting cookie is exactly the same as the + // client's cookie, then there's no need to send it at all. +} diff --git a/go/arrow/flight/session/stateful_session.go b/go/arrow/flight/session/stateful_session.go new file mode 100644 index 0000000000000..5e3d9c72f5e5c --- /dev/null +++ b/go/arrow/flight/session/stateful_session.go @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package session + +import ( + "context" + "fmt" + "net/http" + "sync" + + "github.com/apache/arrow/go/v16/arrow/flight" + "github.com/google/uuid" +) + +const StatefulSessionCookieName string = "arrow_flight_session_id" + +// SessionStore handles persistence of ServerSession instances for +// stateful session implementations. +type SessionStore interface { + // Get the session with the provided ID + Get(id string) (ServerSession, error) + // Persist the provided session + Put(session ServerSession) error + // Remove the session with the provided ID + Remove(id string) error +} + +// SessionFactory creates ServerSession instances +type SessionFactory interface { + // Create a new, empty ServerSession + CreateSession() (ServerSession, error) +} + +// NewSessionStore creates a simple in-memory, goroutine-safe SessionStore +func NewSessionStore() *sessionStore { + return &sessionStore{sessions: make(map[string]ServerSession)} +} + +type sessionStore struct { + sessions map[string]ServerSession + mu sync.RWMutex +} + +func (store *sessionStore) Get(id string) (ServerSession, error) { + store.mu.RLock() + defer store.mu.RUnlock() + session, found := store.sessions[id] + if !found { + return nil, fmt.Errorf("session with ID %s not found", id) + } + return session, nil +} + +func (store *sessionStore) Put(session ServerSession) error { + store.mu.Lock() + defer store.mu.Unlock() + store.sessions[session.Token()] = session + return nil +} + +func (store *sessionStore) Remove(id string) error { + store.mu.Lock() + defer store.mu.Unlock() + delete(store.sessions, id) + + return nil +} + +// NewSessionFactory creates a new SessionFactory, producing in-memory, goroutine-safe ServerSessions. +// The provided function MUST produce collision-free identifiers. +func NewSessionFactory(generateID func() string) *sessionFactory { + return &sessionFactory{generateID: generateID} +} + +type sessionFactory struct { + generateID func() string +} + +func (factory *sessionFactory) CreateSession() (ServerSession, error) { + return &statefulServerSession{ + id: factory.generateID(), + serverSession: serverSession{options: make(map[string]*flight.SessionOptionValue)}, + }, nil +} + +type statefulServerSession struct { + serverSession + id string +} + +func (session *statefulServerSession) Token() string { + return session.id +} + +type StatefulSessionManagerOption func(*statefulServerSessionManager) + +// WithFactory specifies the SessionFactory to use for session creation +func WithFactory(factory SessionFactory) StatefulSessionManagerOption { + return func(manager *statefulServerSessionManager) { + manager.factory = factory + } +} + +// WithStore specifies the SessionStore to use for session persistence +func WithStore(store SessionStore) StatefulSessionManagerOption { + return func(manager *statefulServerSessionManager) { + manager.store = store + } +} + +// NewStatefulServerSessionManager creates a new ServerSessionManager. +// +// - If unset via options, the default factory produces sessions with UUIDs. +// - If unset via options, sessions are stored in-memory. +func NewStatefulServerSessionManager(opts ...StatefulSessionManagerOption) *statefulServerSessionManager { + manager := &statefulServerSessionManager{} + for _, opt := range opts { + opt(manager) + } + + // Set defaults if not specified above + if manager.factory == nil { + manager.factory = NewSessionFactory(uuid.NewString) + } + + if manager.store == nil { + manager.store = NewSessionStore() + } + + return manager +} + +type statefulServerSessionManager struct { + factory SessionFactory + store SessionStore +} + +func (manager *statefulServerSessionManager) CreateSession(ctx context.Context) (ServerSession, error) { + session, err := manager.factory.CreateSession() + if err != nil { + return nil, fmt.Errorf("failed to create new session: %w", err) + } + + if err = manager.store.Put(session); err != nil { + return nil, fmt.Errorf("failed to persist new session: %w", err) + } + + return session, nil +} + +func (manager *statefulServerSessionManager) GetSession(ctx context.Context) (ServerSession, error) { + session, err := GetSessionFromContext(ctx) + if err == nil { + return session, nil + } + + sessionID, err := getSessionIDFromIncomingCookie(ctx) + if err == nil { + return manager.store.Get(sessionID) + } + if err == http.ErrNoCookie { + return nil, ErrNoSession + } + + return nil, fmt.Errorf("failed to get current session from cookie: %w", err) +} + +func (manager *statefulServerSessionManager) CloseSession(session ServerSession) error { + if err := manager.store.Remove(session.Token()); err != nil { + return fmt.Errorf("failed to remove server session from store: %w", err) + } + return nil +} + +// Check the provided context for cookies in the incoming gRPC metadata. +func getSessionIDFromIncomingCookie(ctx context.Context) (string, error) { + cookie, err := GetIncomingCookieByName(ctx, StatefulSessionCookieName) + if err != nil { + return "", err + } + + return cookie.Value, nil +} diff --git a/go/arrow/flight/session/stateless_session.go b/go/arrow/flight/session/stateless_session.go new file mode 100644 index 0000000000000..b57d78230a8e8 --- /dev/null +++ b/go/arrow/flight/session/stateless_session.go @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package session + +import ( + "context" + "encoding/base64" + "fmt" + "net/http" + + "github.com/apache/arrow/go/v16/arrow/flight" + "google.golang.org/protobuf/proto" +) + +const StatelessSessionCookieName string = "arrow_flight_session" + +// NewStatelessServerSessionManager creates a new StatelessServerSessionManager. +// +// The tokens it produces contain the entire session state, so sessions can +// be maintained across multiple backends. +// Token contents are considered opaque but are NOT encrypted. +func NewStatelessServerSessionManager() *statelessServerSessionManager { + return &statelessServerSessionManager{} +} + +type statelessServerSessionManager struct{} + +func (manager *statelessServerSessionManager) CreateSession(ctx context.Context) (ServerSession, error) { + return NewStatelessServerSession(nil), nil +} + +func (manager *statelessServerSessionManager) GetSession(ctx context.Context) (ServerSession, error) { + session, err := GetSessionFromContext(ctx) + if err == nil { + return session, nil + } + + session, err = getSessionFromIncomingCookie(ctx) + if err == nil { + return session, err + } + if err == http.ErrNoCookie { + return nil, ErrNoSession + } + + return nil, fmt.Errorf("failed to get current session from cookie: %w", err) +} + +func (manager *statelessServerSessionManager) CloseSession(session ServerSession) error { + return nil +} + +// NewStatelessServerSession creates a new instance of a server session that can serialize its entire state. +// A map is provided containing the initial state. If it is nil, a new empty state will be created. +func NewStatelessServerSession(options map[string]*flight.SessionOptionValue) *statelessServerSession { + if options == nil { + options = make(map[string]*flight.SessionOptionValue) + } + + return &statelessServerSession{ + serverSession: serverSession{options: options}, + } +} + +type statelessServerSession struct { + serverSession +} + +// First encode session contents using protobuf binary marshaller. +// Then base64 encode the resulting bytes for client compatibility. +func (session *statelessServerSession) Token() string { + session.mu.RLock() + defer session.mu.RUnlock() + + payload := flight.GetSessionOptionsResult{SessionOptions: session.options} + b, err := proto.Marshal(&payload) + if err != nil { + panic(fmt.Sprintf("failed to marshal stateless token: %s", err)) + } + + return base64.StdEncoding.EncodeToString(b) +} + +// Reconstruct the session from its fully encoded token representation +func decodeStatelessToken(token string) (*statelessServerSession, error) { + decoded, err := base64.StdEncoding.DecodeString(token) + if err != nil { + return nil, err + } + + var parsed flight.GetSessionOptionsResult + if err := proto.Unmarshal(decoded, &parsed); err != nil { + return nil, err + } + + return NewStatelessServerSession(parsed.SessionOptions), nil +} + +// Check the provided context for a cookie in the incoming gRPC metadata containing the +// stateless session token. Decode the token payload to reconstruct the session. +func getSessionFromIncomingCookie(ctx context.Context) (*statelessServerSession, error) { + cookie, err := GetIncomingCookieByName(ctx, StatelessSessionCookieName) + if err != nil { + return nil, err + } + + return decodeStatelessToken(cookie.Value) +} diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go index 91658a694ecab..342727d03cb8d 100644 --- a/go/arrow/internal/flight_integration/scenario.go +++ b/go/arrow/internal/flight_integration/scenario.go @@ -36,6 +36,7 @@ import ( "github.com/apache/arrow/go/v16/arrow/flight" "github.com/apache/arrow/go/v16/arrow/flight/flightsql" "github.com/apache/arrow/go/v16/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v16/arrow/flight/session" "github.com/apache/arrow/go/v16/arrow/internal/arrjson" "github.com/apache/arrow/go/v16/arrow/ipc" "github.com/apache/arrow/go/v16/arrow/memory" @@ -77,6 +78,8 @@ func GetScenario(name string, args ...string) Scenario { return &flightSqlScenarioTester{} case "flight_sql:extension": return &flightSqlExtensionScenarioTester{} + case "session_options": + return &sessionOptionsScenarioTester{} case "": if len(args) > 0 { return &defaultIntegrationTester{path: args[0]} @@ -2635,3 +2638,261 @@ func (m *flightSqlExtensionScenarioTester) ValidateTransactions(client *flightsq return txn.Rollback(ctx) } + +type sessionOptionsScenarioTester struct { + flightsql.BaseServer +} + +func (tester *sessionOptionsScenarioTester) MakeServer(port int) flight.Server { + srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{ + flight.CreateServerMiddleware(session.NewServerSessionMiddleware(nil)), + }) + + srv.RegisterFlightService(flightsql.NewFlightServer(tester)) + initServer(port, srv) + return srv +} + +func (tester *sessionOptionsScenarioTester) SetSessionOptions(ctx context.Context, req *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error) { + session, err := session.GetSessionFromContext(ctx) + if err != nil { + return nil, err + } + + errors := make(map[string]*flight.SetSessionOptionsResultError) + for key, val := range req.GetSessionOptions() { + if key == "lol_invalid" { + errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidName} + continue + } + if val.GetStringValue() == "lol_invalid" { + errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidValue} + continue + } + + session.SetSessionOption(key, val) + } + + return &flight.SetSessionOptionsResult{Errors: errors}, nil +} + +func (tester *sessionOptionsScenarioTester) GetSessionOptions(ctx context.Context, req *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error) { + session, err := session.GetSessionFromContext(ctx) + if err != nil { + return nil, err + } + + return &flight.GetSessionOptionsResult{SessionOptions: session.GetSessionOptions()}, nil +} + +func (tester *sessionOptionsScenarioTester) CloseSession(ctx context.Context, req *flight.CloseSessionRequest) (*flight.CloseSessionResult, error) { + session, err := session.GetSessionFromContext(ctx) + if err != nil { + return nil, err + } + + if err = session.Close(); err != nil { + return nil, err + } + + return &flight.CloseSessionResult{Status: flight.CloseSessionResultClosed}, nil +} + +func (tester *sessionOptionsScenarioTester) RunClient(addr string, opts ...grpc.DialOption) error { + middleware := []flight.ClientMiddleware{ + flight.NewClientCookieMiddleware(), + } + client, err := flight.NewClientWithMiddleware(addr, nil, middleware, opts...) + if err != nil { + return err + } + defer client.Close() + + // Run validations in order. We are changing session state in each step, so order is made explicit. + ctx := context.Background() + if err = tester.ValidateFirstGetSessionOptions(ctx, client); err != nil { + return err + } + + if err = tester.ValidateSecondSetSessionOptions(ctx, client); err != nil { + return err + } + + if err = tester.ValidateThirdGetSessionOptions(ctx, client); err != nil { + return err + } + + if err = tester.ValidateFourthRemoveOption(ctx, client); err != nil { + return err + } + + if err = tester.ValidateFifthGetSessionOptions(ctx, client); err != nil { + return err + } + + if err = tester.ValidateSixthCloseSession(ctx, client); err != nil { + return err + } + + // C++ impl currently fails with "Invalid or expired arrow_flight_session_id cookie", likely related to GH-39791 + // if err = tester.ValidateSeventhGetSessionOptions(ctx, client); err != nil { + // return err + // } + + return nil +} + +func (tester *sessionOptionsScenarioTester) ValidateFirstGetSessionOptions(ctx context.Context, client flight.Client) error { + res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + if err != nil { + return err + } + + opts := res.GetSessionOptions() + if len(opts) != 0 { + return fmt.Errorf("expected new session to be empty, but found %d options already set", len(opts)) + } + + return nil +} + +func (tester *sessionOptionsScenarioTester) ValidateSecondSetSessionOptions(ctx context.Context, client flight.Client) error { + opts, err := flight.NewSessionOptionValues(map[string]any{ + "foolong": int64(123), + "bardouble": 456.0, + "lol_invalid": "this won't get set", + "key_with_invalid_value": "lol_invalid", + "big_ol_string_list": []string{"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"}, + }) + if err != nil { + return err + } + + res, err := client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: opts}) + if err != nil { + return err + } + + expectedErrs := map[string]*flight.SetSessionOptionsResultError{ + "lol_invalid": {Value: flight.SetSessionOptionsResultErrorInvalidName}, + "key_with_invalid_value": {Value: flight.SetSessionOptionsResultErrorInvalidValue}, + } + + errs := res.GetErrors() + if len(errs) != len(expectedErrs) { + return fmt.Errorf("errors expected: %d, got: %d", len(expectedErrs), len(errs)) + } + + for key, val := range errs { + if !reflect.DeepEqual(val, expectedErrs[key]) { + return fmt.Errorf("error mismatch for key %s. expected: %s, got: %s", key, expectedErrs[key], val) + } + } + + return nil +} + +func (tester *sessionOptionsScenarioTester) ValidateThirdGetSessionOptions(ctx context.Context, client flight.Client) error { + res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + if err != nil { + return err + } + + expectedOpts, err := flight.NewSessionOptionValues(map[string]any{ + "foolong": int64(123), + "bardouble": 456.0, + "big_ol_string_list": []string{"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"}, + }) + if err != nil { + return err + } + + opts := res.GetSessionOptions() + if len(opts) != len(expectedOpts) { + return fmt.Errorf("options expected: %d, got: %d", len(expectedOpts), len(opts)) + } + + for key, val := range opts { + if !reflect.DeepEqual(val, expectedOpts[key]) { + return fmt.Errorf("session options mismatch for key %s. expected: %s, got: %s", key, expectedOpts[key], val) + } + } + + return nil +} + +func (tester *sessionOptionsScenarioTester) ValidateFourthRemoveOption(ctx context.Context, client flight.Client) error { + opts, err := flight.NewSessionOptionValues(map[string]any{ + "foolong": nil, + }) + if err != nil { + return err + } + + res, err := client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: opts}) + if err != nil { + return err + } + + errs := res.GetErrors() + if len(errs) != 0 { + return fmt.Errorf("errors expected: %d, got: %d", 0, len(errs)) + } + + return nil +} + +func (tester *sessionOptionsScenarioTester) ValidateFifthGetSessionOptions(ctx context.Context, client flight.Client) error { + res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + if err != nil { + return err + } + + expectedOpts, err := flight.NewSessionOptionValues(map[string]any{ + "bardouble": 456.0, + "big_ol_string_list": []string{"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"}, + }) + if err != nil { + return err + } + + opts := res.GetSessionOptions() + if len(opts) != len(expectedOpts) { + return fmt.Errorf("options expected: %d, got: %d", len(expectedOpts), len(opts)) + } + + for key, val := range opts { + if !reflect.DeepEqual(val, expectedOpts[key]) { + return fmt.Errorf("session options mismatch for key %s. expected: %s, got: %s", key, expectedOpts[key], val) + } + } + + return nil +} + +func (tester *sessionOptionsScenarioTester) ValidateSixthCloseSession(ctx context.Context, client flight.Client) error { + res, err := client.CloseSession(ctx, &flight.CloseSessionRequest{}) + if err != nil { + return err + } + + if res.GetStatus() != flight.CloseSessionResultClosed { + return fmt.Errorf("expected session to successfully close, but found status: %s", res.GetStatus()) + } + + return nil +} + +func (tester *sessionOptionsScenarioTester) ValidateSeventhGetSessionOptions(ctx context.Context, client flight.Client) error { + res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}) + if err != nil { + return err + } + + opts := res.GetSessionOptions() + if len(opts) != 0 { + return fmt.Errorf("expected new session to be empty, but found %d options already set", len(opts)) + } + + return nil +} diff --git a/go/arrow/table.go b/go/arrow/table.go index f0728108d94b9..15fd3e5bcf3f9 100644 --- a/go/arrow/table.go +++ b/go/arrow/table.go @@ -39,6 +39,8 @@ type Table interface { Retain() Release() + + fmt.Stringer } // Column is an immutable column data structure consisting of diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 025632c45a56d..2406886222dcb 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -151,7 +151,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.12.1 @@ -166,7 +166,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.12.1 diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java index f29028547c452..6bd3f8ddf8592 100644 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java +++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java @@ -20,7 +20,6 @@ import java.util.HashMap; import java.util.Map; -import org.apache.arrow.flight.CallStatus; import org.apache.arrow.flight.CloseSessionRequest; import org.apache.arrow.flight.CloseSessionResult; import org.apache.arrow.flight.FlightRuntimeException; @@ -84,11 +83,6 @@ public void setSessionOptions(SetSessionOptionsRequest request, CallContext cont public void getSessionOptions(GetSessionOptionsRequest request, CallContext context, StreamListener listener) { ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey); - if (!middleware.hasSession()) { - // Attempt to get options without an existing session - listener.onError(CallStatus.NOT_FOUND.withDescription("No current server session").toRuntimeException()); - return; - } final Map sessionOptions = middleware.getSession().getSessionOptions(); listener.onNext(new GetSessionOptionsResult(sessionOptions)); listener.onCompleted(); diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index ce1f52e39676e..020f181f5d107 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -88,8 +88,8 @@ org.hamcrest - hamcrest-core - 1.3 + hamcrest + 2.2 test diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 28534a9b0badd..bf053f3f7798a 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -28,11 +28,11 @@ https://arrow.apache.org - + org.hamcrest - hamcrest-core - 1.3 + hamcrest + 2.2 test diff --git a/java/maven/pom.xml b/java/maven/pom.xml index c2b13119fc440..ccc12f5397fb7 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -322,7 +322,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.12.1 @@ -338,7 +338,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.12.1 diff --git a/java/pom.xml b/java/pom.xml index 69ee8a26d946f..ea8e30bf500bf 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -45,7 +45,7 @@ true 9+181-r4173-1 2.24.0 - 3.11.0 + 3.12.1 5.5.0 5.2.0 3.42.0 @@ -400,7 +400,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.12.1 @@ -603,7 +603,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.12.1 @@ -768,7 +768,7 @@ de.huxhorn.lilith de.huxhorn.lilith.logback.appender.multiplex-classic - 0.9.44 + 8.3.0 test @@ -808,7 +808,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.12.1 diff --git a/java/vector/pom.xml b/java/vector/pom.xml index dc453963b62f6..dde53e7e656bf 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -53,7 +53,7 @@ commons-codec commons-codec - 1.16.0 + 1.16.1 org.apache.arrow diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 54a5b99e058a5..1d6524373a733 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -722,6 +722,9 @@ endif() # Error on any warnings not already explicitly ignored. set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors") +# GH-40236: make generated C++ code easier to compile by disabling an +# undocumented Cython feature. +set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--no-c-in-traceback") foreach(module ${CYTHON_EXTENSIONS}) string(REPLACE "." ";" directories ${module}) diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index b93f71969e8d3..8b9e62d628870 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -3139,6 +3139,13 @@ cdef class FileSystemFactoryOptions(_Weakrefable): self.options.selector_ignore_prefixes = [tobytes(v) for v in values] +cdef vector[CFileInfo] unwrap_finfos(finfos): + cdef vector[CFileInfo] o_vect + for fi in finfos: + o_vect.push_back(( fi).unwrap()) + return o_vect + + cdef class FileSystemDatasetFactory(DatasetFactory): """ Create a DatasetFactory from a list of paths with schema inspection. @@ -3163,6 +3170,7 @@ cdef class FileSystemDatasetFactory(DatasetFactory): FileSystemFactoryOptions options=None): cdef: vector[c_string] paths + vector[CFileInfo] finfos CFileSelector c_selector CResult[shared_ptr[CDatasetFactory]] result shared_ptr[CFileSystem] c_filesystem @@ -3184,14 +3192,24 @@ cdef class FileSystemDatasetFactory(DatasetFactory): c_options ) elif isinstance(paths_or_selector, (list, tuple)): - paths = [tobytes(s) for s in paths_or_selector] - with nogil: - result = CFileSystemDatasetFactory.MakeFromPaths( - c_filesystem, - paths, - c_format, - c_options - ) + if len(paths_or_selector) > 0 and isinstance(paths_or_selector[0], FileInfo): + finfos = unwrap_finfos(paths_or_selector) + with nogil: + result = CFileSystemDatasetFactory.MakeFromFileInfos( + c_filesystem, + finfos, + c_format, + c_options + ) + else: + paths = [tobytes(s) for s in paths_or_selector] + with nogil: + result = CFileSystemDatasetFactory.MakeFromPaths( + c_filesystem, + paths, + c_format, + c_options + ) else: raise TypeError('Must pass either paths or a FileSelector, but ' 'passed {}'.format(type(paths_or_selector))) diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index 67ee7590560f0..8289215de2e29 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -1134,8 +1134,8 @@ cdef class MetadataRecordBatchWriter(_CRecordBatchWriter): ---------- table : Table max_chunksize : int, default None - Maximum size for RecordBatch chunks. Individual chunks may be - smaller depending on the chunk layout of individual columns. + Maximum number of rows for RecordBatch chunks. Individual chunks may + be smaller depending on the chunk layout of individual columns. """ cdef: # max_chunksize must be > 0 to have any impact diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index ad01d45571ba1..7d9b65c77d25a 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1573,7 +1573,7 @@ cdef class Array(_PandasConvertible): # decoding the dictionary will make sure nulls are correctly handled. # Decoding a dictionary does imply a copy by the way, # so it can't be done if the user requested a zero_copy. - c_options.decode_dictionaries = not zero_copy_only + c_options.decode_dictionaries = True c_options.zero_copy_only = zero_copy_only c_options.to_numpy = True @@ -1585,9 +1585,6 @@ cdef class Array(_PandasConvertible): # always convert to numpy array without pandas dependency array = PyObject_to_object(out) - if isinstance(array, dict): - array = np.take(array['dictionary'], array['indices']) - if writable and not array.flags.writeable: # if the conversion already needed to a copy, writeable is True array = array.copy() @@ -1778,6 +1775,70 @@ cdef class Array(_PandasConvertible): return pyarrow_wrap_array(array) + def _export_to_c_device(self, out_ptr, out_schema_ptr=0): + """ + Export to a C ArrowDeviceArray struct, given its pointer. + + If a C ArrowSchema struct pointer is also given, the array type + is exported to it at the same time. + + Parameters + ---------- + out_ptr: int + The raw pointer to a C ArrowDeviceArray struct. + out_schema_ptr: int (optional) + The raw pointer to a C ArrowSchema struct. + + Be careful: if you don't pass the ArrowDeviceArray struct to a consumer, + array memory will leak. This is a low-level function intended for + expert users. + """ + cdef: + void* c_ptr = _as_c_pointer(out_ptr) + void* c_schema_ptr = _as_c_pointer(out_schema_ptr, + allow_null=True) + with nogil: + check_status(ExportDeviceArray( + deref(self.sp_array), NULL, + c_ptr, c_schema_ptr)) + + @staticmethod + def _import_from_c_device(in_ptr, type): + """ + Import Array from a C ArrowDeviceArray struct, given its pointer + and the imported array type. + + Parameters + ---------- + in_ptr: int + The raw pointer to a C ArrowDeviceArray struct. + type: DataType or int + Either a DataType object, or the raw pointer to a C ArrowSchema + struct. + + This is a low-level function intended for expert users. + """ + cdef: + void* c_ptr = _as_c_pointer(in_ptr) + void* c_type_ptr + shared_ptr[CArray] c_array + + c_type = pyarrow_unwrap_data_type(type) + if c_type == nullptr: + # Not a DataType object, perhaps a raw ArrowSchema pointer + c_type_ptr = _as_c_pointer(type) + with nogil: + c_array = GetResultValue( + ImportDeviceArray( c_ptr, + c_type_ptr) + ) + else: + with nogil: + c_array = GetResultValue( + ImportDeviceArray( c_ptr, c_type) + ) + return pyarrow_wrap_array(c_array) + def __dlpack__(self, stream=None): """Export a primitive array as a DLPack capsule. diff --git a/python/pyarrow/cffi.py b/python/pyarrow/cffi.py index 961b61dee59fd..1da1a91691404 100644 --- a/python/pyarrow/cffi.py +++ b/python/pyarrow/cffi.py @@ -64,6 +64,16 @@ // Opaque producer-specific data void* private_data; }; + + typedef int32_t ArrowDeviceType; + + struct ArrowDeviceArray { + struct ArrowArray array; + int64_t device_id; + ArrowDeviceType device_type; + void* sync_event; + int64_t reserved[3]; + }; """ # TODO use out-of-line mode for faster import and avoid C parsing diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py index f83753ac57d47..1efbfe1665a75 100644 --- a/python/pyarrow/dataset.py +++ b/python/pyarrow/dataset.py @@ -456,11 +456,22 @@ def _filesystem_dataset(source, schema=None, filesystem=None, ------- FileSystemDataset """ + from pyarrow.fs import LocalFileSystem, _ensure_filesystem, FileInfo + format = _ensure_format(format or 'parquet') partitioning = _ensure_partitioning(partitioning) if isinstance(source, (list, tuple)): - fs, paths_or_selector = _ensure_multiple_sources(source, filesystem) + if source and isinstance(source[0], FileInfo): + if filesystem is None: + # fall back to local file system as the default + fs = LocalFileSystem() + else: + # construct a filesystem if it is a valid URI + fs = _ensure_filesystem(filesystem) + paths_or_selector = source + else: + fs, paths_or_selector = _ensure_multiple_sources(source, filesystem) else: fs, paths_or_selector = _ensure_single_source(source, filesystem) @@ -767,6 +778,7 @@ def dataset(source, schema=None, format=None, filesystem=None, ... dataset("local/path/to/data", format="ipc") ... ]) # doctest: +SKIP """ + from pyarrow.fs import FileInfo # collect the keyword arguments for later reuse kwargs = dict( schema=schema, @@ -781,7 +793,7 @@ def dataset(source, schema=None, format=None, filesystem=None, if _is_path_like(source): return _filesystem_dataset(source, **kwargs) elif isinstance(source, (tuple, list)): - if all(_is_path_like(elem) for elem in source): + if all(_is_path_like(elem) or isinstance(elem, FileInfo) for elem in source): return _filesystem_dataset(source, **kwargs) elif all(isinstance(elem, Dataset) for elem in source): return _union_dataset(source, **kwargs) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 935fb4d34b318..bc9d05ddbbc37 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -245,6 +245,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType): int bit_width() + int byte_width() cdef cppclass CNullArray" arrow::NullArray"(CArray): CNullArray(int64_t length) @@ -345,6 +346,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: CResult[unique_ptr[CResizableBuffer]] AllocateResizableBuffer( const int64_t size, CMemoryPool* pool) + cdef cppclass CSyncEvent" arrow::Device::SyncEvent": + pass + + cdef cppclass CDevice" arrow::Device": + pass + cdef CMemoryPool* c_default_memory_pool" arrow::default_memory_pool"() cdef CMemoryPool* c_system_memory_pool" arrow::system_memory_pool"() cdef CStatus c_jemalloc_memory_pool" arrow::jemalloc_memory_pool"( @@ -2901,6 +2908,9 @@ cdef extern from "arrow/c/abi.h": cdef struct ArrowArrayStream: void (*release)(ArrowArrayStream*) noexcept nogil + cdef struct ArrowDeviceArray: + pass + cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil: CStatus ExportType(CDataType&, ArrowSchema* out) CResult[shared_ptr[CDataType]] ImportType(ArrowSchema*) @@ -2933,6 +2943,20 @@ cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil: CStatus ExportChunkedArray(shared_ptr[CChunkedArray], ArrowArrayStream*) CResult[shared_ptr[CChunkedArray]] ImportChunkedArray(ArrowArrayStream*) + CStatus ExportDeviceArray(const CArray&, shared_ptr[CSyncEvent], + ArrowDeviceArray* out, ArrowSchema*) + CResult[shared_ptr[CArray]] ImportDeviceArray( + ArrowDeviceArray*, shared_ptr[CDataType]) + CResult[shared_ptr[CArray]] ImportDeviceArray( + ArrowDeviceArray*, ArrowSchema*) + + CStatus ExportDeviceRecordBatch(const CRecordBatch&, shared_ptr[CSyncEvent], + ArrowDeviceArray* out, ArrowSchema*) + CResult[shared_ptr[CRecordBatch]] ImportDeviceRecordBatch( + ArrowDeviceArray*, shared_ptr[CSchema]) + CResult[shared_ptr[CRecordBatch]] ImportDeviceRecordBatch( + ArrowDeviceArray*, ArrowSchema*) + cdef extern from "arrow/util/byte_size.h" namespace "arrow::util" nogil: CResult[int64_t] ReferencedBufferSize(const CArray& array_data) diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd index 4566cb5004add..fe96705a54b2f 100644 --- a/python/pyarrow/includes/libarrow_dataset.pxd +++ b/python/pyarrow/includes/libarrow_dataset.pxd @@ -403,3 +403,11 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: shared_ptr[CFileFormat] format, CFileSystemFactoryOptions options ) + + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromFileInfos "Make"( + shared_ptr[CFileSystem] filesystem, + vector[CFileInfo] files, + shared_ptr[CFileFormat] format, + CFileSystemFactoryOptions options + ) diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd index 906f0b7d28e59..136d6bc8b14cd 100644 --- a/python/pyarrow/includes/libarrow_python.pxd +++ b/python/pyarrow/includes/libarrow_python.pxd @@ -283,6 +283,14 @@ cdef extern from "arrow/python/ipc.h" namespace "arrow::py": object) +cdef extern from "arrow/python/ipc.h" namespace "arrow::py" nogil: + cdef cppclass CCastingRecordBatchReader" arrow::py::CastingRecordBatchReader" \ + (CRecordBatchReader): + @staticmethod + CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CRecordBatchReader], + shared_ptr[CSchema]) + + cdef extern from "arrow/python/extension_type.h" namespace "arrow::py": cdef cppclass CPyExtensionType \ " arrow::py::PyExtensionType"(CExtensionType): diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi index da9636dfc86e1..617e25a14235d 100644 --- a/python/pyarrow/ipc.pxi +++ b/python/pyarrow/ipc.pxi @@ -515,8 +515,8 @@ cdef class _CRecordBatchWriter(_Weakrefable): ---------- table : Table max_chunksize : int, default None - Maximum size for RecordBatch chunks. Individual chunks may be - smaller depending on the chunk layout of individual columns. + Maximum number of rows for RecordBatch chunks. Individual chunks may + be smaller depending on the chunk layout of individual columns. """ cdef: # max_chunksize must be > 0 to have any impact @@ -772,6 +772,38 @@ cdef class RecordBatchReader(_Weakrefable): def __exit__(self, exc_type, exc_val, exc_tb): self.close() + def cast(self, target_schema): + """ + Wrap this reader with one that casts each batch lazily as it is pulled. + Currently only a safe cast to target_schema is implemented. + + Parameters + ---------- + target_schema : Schema + Schema to cast to, the names and order of fields must match. + + Returns + ------- + RecordBatchReader + """ + cdef: + shared_ptr[CSchema] c_schema + shared_ptr[CRecordBatchReader] c_reader + RecordBatchReader out + + if self.schema.names != target_schema.names: + raise ValueError("Target schema's field names are not matching " + f"the table's field names: {self.schema.names}, " + f"{target_schema.names}") + + c_schema = pyarrow_unwrap_schema(target_schema) + c_reader = GetResultValue(CCastingRecordBatchReader.Make( + self.reader, c_schema)) + + out = RecordBatchReader.__new__(RecordBatchReader) + out.reader = c_reader + return out + def _export_to_c(self, out_ptr): """ Export to a C ArrowArrayStream struct, given its pointer. @@ -827,8 +859,6 @@ cdef class RecordBatchReader(_Weakrefable): The schema to which the stream should be casted, passed as a PyCapsule containing a C ArrowSchema representation of the requested schema. - Currently, this is not supported and will raise a - NotImplementedError if the schema doesn't match the current schema. Returns ------- @@ -840,11 +870,8 @@ cdef class RecordBatchReader(_Weakrefable): if requested_schema is not None: out_schema = Schema._import_from_c_capsule(requested_schema) - # TODO: figure out a way to check if one schema is castable to - # another. Once we have that, we can perform validation here and - # if successful creating a wrapping reader that casts each batch. if self.schema != out_schema: - raise NotImplementedError("Casting to requested_schema") + return self.cast(out_schema).__arrow_c_stream__() stream_capsule = alloc_c_stream(&c_stream) diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc index 2115cd8015cac..cb9cbe5b930e7 100644 --- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc +++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc @@ -2515,6 +2515,8 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options, std::shared_ptr arr, PyObject* py_ref, PyObject** out) { if (options.decode_dictionaries && arr->type()->id() == Type::DICTIONARY) { + // XXX we should return an error as below if options.zero_copy_only + // is true, but that would break compatibility with existing tests. const auto& dense_type = checked_cast(*arr->type()).value_type(); RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr)); diff --git a/python/pyarrow/src/arrow/python/extension_type.cc b/python/pyarrow/src/arrow/python/extension_type.cc index 3ccc171c8713a..be66b4a1c68ea 100644 --- a/python/pyarrow/src/arrow/python/extension_type.cc +++ b/python/pyarrow/src/arrow/python/extension_type.cc @@ -72,7 +72,7 @@ PyObject* DeserializeExtInstance(PyObject* type_class, static const char* kExtensionName = "arrow.py_extension_type"; -std::string PyExtensionType::ToString() const { +std::string PyExtensionType::ToString(bool show_metadata) const { PyAcquireGIL lock; std::stringstream ss; diff --git a/python/pyarrow/src/arrow/python/extension_type.h b/python/pyarrow/src/arrow/python/extension_type.h index e433d9aca7081..e6523824eb963 100644 --- a/python/pyarrow/src/arrow/python/extension_type.h +++ b/python/pyarrow/src/arrow/python/extension_type.h @@ -33,7 +33,7 @@ class ARROW_PYTHON_EXPORT PyExtensionType : public ExtensionType { // Implement extensionType API std::string extension_name() const override { return extension_name_; } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; bool ExtensionEquals(const ExtensionType& other) const override; diff --git a/python/pyarrow/src/arrow/python/ipc.cc b/python/pyarrow/src/arrow/python/ipc.cc index 93481822475db..0ed152242425d 100644 --- a/python/pyarrow/src/arrow/python/ipc.cc +++ b/python/pyarrow/src/arrow/python/ipc.cc @@ -19,6 +19,7 @@ #include +#include "arrow/compute/cast.h" #include "arrow/python/pyarrow.h" namespace arrow { @@ -63,5 +64,70 @@ Result> PyRecordBatchReader::Make( return reader; } +CastingRecordBatchReader::CastingRecordBatchReader() = default; + +Status CastingRecordBatchReader::Init(std::shared_ptr parent, + std::shared_ptr schema) { + std::shared_ptr src = parent->schema(); + + // The check for names has already been done in Python where it's easier to + // generate a nice error message. + int num_fields = schema->num_fields(); + if (src->num_fields() != num_fields) { + return Status::Invalid("Number of fields not equal"); + } + + // Ensure all columns can be cast before succeeding + for (int i = 0; i < num_fields; i++) { + if (!compute::CanCast(*src->field(i)->type(), *schema->field(i)->type())) { + return Status::TypeError("Field ", i, " cannot be cast from ", + src->field(i)->type()->ToString(), " to ", + schema->field(i)->type()->ToString()); + } + } + + parent_ = std::move(parent); + schema_ = std::move(schema); + + return Status::OK(); +} + +std::shared_ptr CastingRecordBatchReader::schema() const { return schema_; } + +Status CastingRecordBatchReader::ReadNext(std::shared_ptr* batch) { + std::shared_ptr out; + ARROW_RETURN_NOT_OK(parent_->ReadNext(&out)); + if (!out) { + batch->reset(); + return Status::OK(); + } + + auto num_columns = out->num_columns(); + auto options = compute::CastOptions::Safe(); + ArrayVector columns(num_columns); + for (int i = 0; i < num_columns; i++) { + const Array& src = *out->column(i); + if (!schema_->field(i)->nullable() && src.null_count() > 0) { + return Status::Invalid( + "Can't cast array that contains nulls to non-nullable field at index ", i); + } + + ARROW_ASSIGN_OR_RAISE(columns[i], + compute::Cast(src, schema_->field(i)->type(), options)); + } + + *batch = RecordBatch::Make(schema_, out->num_rows(), std::move(columns)); + return Status::OK(); +} + +Result> CastingRecordBatchReader::Make( + std::shared_ptr parent, std::shared_ptr schema) { + auto reader = std::shared_ptr(new CastingRecordBatchReader()); + ARROW_RETURN_NOT_OK(reader->Init(parent, schema)); + return reader; +} + +Status CastingRecordBatchReader::Close() { return parent_->Close(); } + } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/arrow/python/ipc.h b/python/pyarrow/src/arrow/python/ipc.h index 92232ed830093..2c16d8c967ff0 100644 --- a/python/pyarrow/src/arrow/python/ipc.h +++ b/python/pyarrow/src/arrow/python/ipc.h @@ -48,5 +48,25 @@ class ARROW_PYTHON_EXPORT PyRecordBatchReader : public RecordBatchReader { OwnedRefNoGIL iterator_; }; +class ARROW_PYTHON_EXPORT CastingRecordBatchReader : public RecordBatchReader { + public: + std::shared_ptr schema() const override; + + Status ReadNext(std::shared_ptr* batch) override; + + static Result> Make( + std::shared_ptr parent, std::shared_ptr schema); + + Status Close() override; + + protected: + CastingRecordBatchReader(); + + Status Init(std::shared_ptr parent, std::shared_ptr schema); + + std::shared_ptr parent_; + std::shared_ptr schema_; +}; + } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 3c4d59d6594a2..a0bae2f50194d 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -581,7 +581,8 @@ struct PyConverterTrait< }; template -struct PyConverterTrait> { +struct PyConverterTrait< + T, enable_if_t::value || is_list_view_type::value>> { using type = PyListConverter; }; @@ -803,7 +804,6 @@ class PyListConverter : public ListConverter { return this->list_builder_->AppendNull(); } - RETURN_NOT_OK(this->list_builder_->Append()); if (PyArray_Check(value)) { RETURN_NOT_OK(AppendNdarray(value)); } else if (PySequence_Check(value)) { @@ -824,6 +824,21 @@ class PyListConverter : public ListConverter { } protected: + // MapType does not support args in the Append() method + Status AppendTo(const MapType*, int64_t size) { return this->list_builder_->Append(); } + + // FixedSizeListType does not support args in the Append() method + Status AppendTo(const FixedSizeListType*, int64_t size) { + return this->list_builder_->Append(); + } + + // ListType requires the size argument in the Append() method + // in order to be convertible to a ListViewType. ListViewType + // requires the size argument in the Append() method always. + Status AppendTo(const BaseListType*, int64_t size) { + return this->list_builder_->Append(true, size); + } + Status ValidateBuilder(const MapType*) { if (this->list_builder_->key_builder()->null_count() > 0) { return Status::Invalid("Invalid Map: key field cannot contain null values"); @@ -836,11 +851,14 @@ class PyListConverter : public ListConverter { Status AppendSequence(PyObject* value) { int64_t size = static_cast(PySequence_Size(value)); + RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); return this->value_converter_->Extend(value, size); } Status AppendIterable(PyObject* value) { + auto size = static_cast(PyObject_Size(value)); + RETURN_NOT_OK(AppendTo(this->list_type_, size)); PyObject* iterator = PyObject_GetIter(value); OwnedRef iter_ref(iterator); while (PyObject* item = PyIter_Next(iterator)) { @@ -857,6 +875,7 @@ class PyListConverter : public ListConverter { return Status::Invalid("Can only convert 1-dimensional array values"); } const int64_t size = PyArray_SIZE(ndarray); + RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); const auto value_type = this->value_converter_->builder()->type(); diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index ee3872aa3a242..d7f7895b538e8 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -2742,6 +2742,29 @@ cdef class RecordBatch(_Tabular): return pyarrow_wrap_batch(c_batch) + def cast(self, Schema target_schema, safe=None, options=None): + """ + Cast batch values to another schema. + + Parameters + ---------- + target_schema : Schema + Schema to cast to, the names and order of fields must match. + safe : bool, default True + Check for overflows or other unsafe conversions. + options : CastOptions, default None + Additional checks pass by CastOptions + + Returns + ------- + RecordBatch + """ + # Wrap the more general Table cast implementation + tbl = Table.from_batches([self]) + casted_tbl = tbl.cast(target_schema, safe=safe, options=options) + casted_batch, = casted_tbl.to_batches() + return casted_batch + def _to_pandas(self, options, **kwargs): return Table.from_batches([self])._to_pandas(options, **kwargs) @@ -3145,6 +3168,68 @@ cdef class RecordBatch(_Tabular): return pyarrow_wrap_batch(c_batch) + def _export_to_c_device(self, out_ptr, out_schema_ptr=0): + """ + Export to a C ArrowDeviceArray struct, given its pointer. + + If a C ArrowSchema struct pointer is also given, the record batch + schema is exported to it at the same time. + + Parameters + ---------- + out_ptr: int + The raw pointer to a C ArrowDeviceArray struct. + out_schema_ptr: int (optional) + The raw pointer to a C ArrowSchema struct. + + Be careful: if you don't pass the ArrowDeviceArray struct to a consumer, + array memory will leak. This is a low-level function intended for + expert users. + """ + cdef: + void* c_ptr = _as_c_pointer(out_ptr) + void* c_schema_ptr = _as_c_pointer(out_schema_ptr, + allow_null=True) + with nogil: + check_status(ExportDeviceRecordBatch( + deref(self.sp_batch), NULL, + c_ptr, c_schema_ptr) + ) + + @staticmethod + def _import_from_c_device(in_ptr, schema): + """ + Import RecordBatch from a C ArrowDeviceArray struct, given its pointer + and the imported schema. + + Parameters + ---------- + in_ptr: int + The raw pointer to a C ArrowDeviceArray struct. + type: Schema or int + Either a Schema object, or the raw pointer to a C ArrowSchema + struct. + + This is a low-level function intended for expert users. + """ + cdef: + void* c_ptr = _as_c_pointer(in_ptr) + void* c_schema_ptr + shared_ptr[CRecordBatch] c_batch + + c_schema = pyarrow_unwrap_schema(schema) + if c_schema == nullptr: + # Not a Schema object, perhaps a raw ArrowSchema pointer + c_schema_ptr = _as_c_pointer(schema, allow_null=True) + with nogil: + c_batch = GetResultValue(ImportDeviceRecordBatch( + c_ptr, c_schema_ptr)) + else: + with nogil: + c_batch = GetResultValue(ImportDeviceRecordBatch( + c_ptr, c_schema)) + return pyarrow_wrap_batch(c_batch) + def _reconstruct_record_batch(columns, schema): """ @@ -4094,8 +4179,8 @@ cdef class Table(_Tabular): Parameters ---------- max_chunksize : int, default None - Maximum size for ChunkedArray chunks. Individual chunks may be - smaller depending on the chunk layout of individual columns. + Maximum number of rows for ChunkedArray chunks. Individual chunks + may be smaller depending on the chunk layout of individual columns. Returns ------- @@ -4189,8 +4274,8 @@ cdef class Table(_Tabular): Parameters ---------- max_chunksize : int, default None - Maximum size for RecordBatch chunks. Individual chunks may be - smaller depending on the chunk layout of individual columns. + Maximum number of rows for each RecordBatch chunk. Individual chunks + may be smaller depending on the chunk layout of individual columns. Returns ------- @@ -4259,8 +4344,8 @@ cdef class Table(_Tabular): Parameters ---------- max_chunksize : int, default None - Maximum size for RecordBatch chunks. Individual chunks may be - smaller depending on the chunk layout of individual columns. + Maximum number of rows for each RecordBatch chunk. Individual chunks + may be smaller depending on the chunk layout of individual columns. Returns ------- diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index bb88a4dcb7b2a..7affe815a22ba 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -167,7 +167,9 @@ def list_types(item_strategy=primitive_types): pa.list_, item_strategy, st.integers(min_value=0, max_value=16) - ) + ), + st.builds(pa.list_view, item_strategy), + st.builds(pa.large_list_view, item_strategy) ) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index bd9ae214b041e..782c41d0d7015 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -627,7 +627,8 @@ def test_string_binary_from_buffers(): assert copied.null_count == 0 -@pytest.mark.parametrize('list_type_factory', [pa.list_, pa.large_list]) +@pytest.mark.parametrize('list_type_factory', [ + pa.list_, pa.large_list, pa.list_view, pa.large_list_view]) def test_list_from_buffers(list_type_factory): ty = list_type_factory(pa.int16()) array = pa.array([[0, 1, 2], None, [], [3, 4, 5]], type=ty) @@ -637,15 +638,15 @@ def test_list_from_buffers(list_type_factory): with pytest.raises(ValueError): # No children - pa.Array.from_buffers(ty, 4, [None, buffers[1]]) + pa.Array.from_buffers(ty, 4, buffers[:ty.num_buffers]) - child = pa.Array.from_buffers(pa.int16(), 6, buffers[2:]) - copied = pa.Array.from_buffers(ty, 4, buffers[:2], children=[child]) + child = pa.Array.from_buffers(pa.int16(), 6, buffers[ty.num_buffers:]) + copied = pa.Array.from_buffers(ty, 4, buffers[:ty.num_buffers], children=[child]) assert copied.equals(array) with pytest.raises(ValueError): # too many children - pa.Array.from_buffers(ty, 4, [None, buffers[1]], + pa.Array.from_buffers(ty, 4, buffers[:ty.num_buffers], children=[child, child]) @@ -2022,6 +2023,9 @@ def test_cast_identities(ty, values): ([[1, 2], [3]], pa.list_(pa.int64())), ([[4, 5], [6]], pa.large_list(pa.int16())), ([['a'], None, ['b', 'c']], pa.list_(pa.string())), + ([[1, 2], [3]], pa.list_view(pa.int64())), + ([[4, 5], [6]], pa.large_list_view(pa.int16())), + ([['a'], None, ['b', 'c']], pa.list_view(pa.string())), ([(1, 'a'), (2, 'c'), None], pa.struct([pa.field('a', pa.int64()), pa.field('b', pa.string())])) ] @@ -3575,9 +3579,10 @@ def test_run_end_encoded_from_buffers(): 1, offset, children) -@pytest.mark.parametrize(('list_array_type'), - [pa.ListViewArray, pa.LargeListViewArray]) -def test_list_view_from_arrays(list_array_type): +@pytest.mark.parametrize(('list_array_type', 'list_type_factory'), + [(pa.ListViewArray, pa.list_view), + (pa.LargeListViewArray, pa.large_list_view)]) +def test_list_view_from_arrays(list_array_type, list_type_factory): # test in order offsets, similar to ListArray representation values = [1, 2, 3, 4, 5, 6, None, 7] offsets = [0, 2, 4, 6] @@ -3589,6 +3594,17 @@ def test_list_view_from_arrays(list_array_type): assert array.offsets.to_pylist() == offsets assert array.sizes.to_pylist() == sizes + # with specified type + typ = list_type_factory(pa.field("name", pa.int64())) + result = list_array_type.from_arrays(offsets, sizes, values, typ) + assert result.type == typ + assert result.type.value_field.name == "name" + + # with mismatching type + typ = list_type_factory(pa.binary()) + with pytest.raises(TypeError): + list_array_type.from_arrays(offsets, sizes, values, type=typ) + # test out of order offsets with overlapping values values = [1, 2, 3, 4] offsets = [2, 1, 0] @@ -3635,12 +3651,121 @@ def test_list_view_from_arrays(list_array_type): assert array.sizes.to_pylist() == sizes -@pytest.mark.parametrize(('list_array_type'), - [pa.ListViewArray, pa.LargeListViewArray]) -def test_list_view_flatten(list_array_type): +@pytest.mark.parametrize(('list_array_type', 'list_type_factory'), + [(pa.ListViewArray, pa.list_view), + (pa.LargeListViewArray, pa.large_list_view)]) +def test_list_view_from_arrays_fails(list_array_type, list_type_factory): + values = [1, 2] + offsets = [0, 1, None] + sizes = [1, 1, 0] + mask = pa.array([False, False, True]) + + # Ambiguous to specify both validity map and offsets or sizes with nulls + with pytest.raises(pa.lib.ArrowInvalid): + list_array_type.from_arrays(offsets, sizes, values, mask=mask) + + offsets = [0, 1, 1] + array = list_array_type.from_arrays(offsets, sizes, values, mask=mask) + array_slice = array[1:] + + # List offsets and sizes must not be slices if a validity map is specified + with pytest.raises(pa.lib.ArrowInvalid): + list_array_type.from_arrays( + array_slice.offsets, array_slice.sizes, + array_slice.values, mask=array_slice.is_null()) + + +@pytest.mark.parametrize(('list_array_type', 'list_type_factory', 'offset_type'), + [(pa.ListViewArray, pa.list_view, pa.int32()), + (pa.LargeListViewArray, pa.large_list_view, pa.int64())]) +def test_list_view_flatten(list_array_type, list_type_factory, offset_type): + arr0 = pa.array([ + 1, None, 2, + 3, 4, + 5, 6, + 7, 8 + ], type=pa.int64()) + + typ1 = list_type_factory(pa.int64()) + arr1 = pa.array([ + [1, None, 2], + None, + [3, 4], + [], + [5, 6], + None, + [7, 8] + ], type=typ1) + offsets1 = pa.array([0, 3, 3, 5, 5, 7, 7], type=offset_type) + sizes1 = pa.array([3, 0, 2, 0, 2, 0, 2], type=offset_type) + + typ2 = list_type_factory( + list_type_factory( + pa.int64() + ) + ) + arr2 = pa.array([ + None, + [ + [1, None, 2], + None, + [3, 4] + ], + [], + [ + [], + [5, 6], + None + ], + [ + [7, 8] + ] + ], type=typ2) + offsets2 = pa.array([0, 0, 3, 3, 6], type=offset_type) + sizes2 = pa.array([0, 3, 0, 3, 1], type=offset_type) + + assert arr1.flatten().equals(arr0) + assert arr1.offsets.equals(offsets1) + assert arr1.sizes.equals(sizes1) + assert arr1.values.equals(arr0) + assert arr2.flatten().equals(arr1) + assert arr2.offsets.equals(offsets2) + assert arr2.sizes.equals(sizes2) + assert arr2.values.equals(arr1) + assert arr2.flatten().flatten().equals(arr0) + assert arr2.values.values.equals(arr0) + + # test out of order offsets values = [1, 2, 3, 4] offsets = [3, 2, 1, 0] sizes = [1, 1, 1, 1] array = list_array_type.from_arrays(offsets, sizes, values) assert array.flatten().to_pylist() == [4, 3, 2, 1] + + # test null elements backed by non-empty sublists + mask = pa.array([False, False, False, True]) + array = list_array_type.from_arrays(offsets, sizes, values, mask=mask) + + assert array.flatten().to_pylist() == [4, 3, 2] + assert array.values.to_pylist() == [1, 2, 3, 4] + + +@pytest.mark.parametrize('list_view_type', [pa.ListViewArray, pa.LargeListViewArray]) +def test_list_view_slice(list_view_type): + # sliced -> values keeps referring to full values buffer, but offsets is + # sliced as well so the offsets correctly point into the full values array + # sliced -> flatten() will return the sliced value array. + + array = list_view_type.from_arrays(offsets=[0, 3, 4], sizes=[ + 3, 1, 2], values=[1, 2, 3, 4, 5, 6]) + sliced_array = array[1:] + + assert sliced_array.values.to_pylist() == [1, 2, 3, 4, 5, 6] + assert sliced_array.offsets.to_pylist() == [3, 4] + assert sliced_array.flatten().to_pylist() == [4, 5, 6] + + i = sliced_array.offsets[0].as_py() + j = sliced_array.offsets[1].as_py() + + assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4] diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index 3a0c7b5b7152f..f8b2ea15d31ad 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -181,11 +181,10 @@ def test_export_import_field(): pa.Field._import_from_c(ptr_schema) -@needs_cffi -def test_export_import_array(): +def check_export_import_array(array_type, exporter, importer): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowArray*") + c_array = ffi.new(f"struct {array_type}*") ptr_array = int(ffi.cast("uintptr_t", c_array)) gc.collect() # Make sure no Arrow data dangles in a ref cycle @@ -195,11 +194,11 @@ def test_export_import_array(): typ = pa.list_(pa.int32()) arr = pa.array([[1], [2, 42]], type=typ) py_value = arr.to_pylist() - arr._export_to_c(ptr_array) + exporter(arr, ptr_array) assert pa.total_allocated_bytes() > old_allocated # Delete recreate C++ object from exported pointer del arr - arr_new = pa.Array._import_from_c(ptr_array, typ) + arr_new = importer(ptr_array, typ) assert arr_new.to_pylist() == py_value assert arr_new.type == pa.list_(pa.int32()) assert pa.total_allocated_bytes() > old_allocated @@ -207,15 +206,15 @@ def test_export_import_array(): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_array_released: - pa.Array._import_from_c(ptr_array, pa.list_(pa.int32())) + importer(ptr_array, pa.list_(pa.int32())) # Type is exported and imported at the same time arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32())) py_value = arr.to_pylist() - arr._export_to_c(ptr_array, ptr_schema) + exporter(arr, ptr_array, ptr_schema) # Delete and recreate C++ objects from exported pointers del arr - arr_new = pa.Array._import_from_c(ptr_array, ptr_schema) + arr_new = importer(ptr_array, ptr_schema) assert arr_new.to_pylist() == py_value assert arr_new.type == pa.list_(pa.int32()) assert pa.total_allocated_bytes() > old_allocated @@ -223,7 +222,35 @@ def test_export_import_array(): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: - pa.Array._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) + + +@needs_cffi +def test_export_import_array(): + check_export_import_array( + "ArrowArray", + pa.Array._export_to_c, + pa.Array._import_from_c, + ) + + +@needs_cffi +def test_export_import_device_array(): + check_export_import_array( + "ArrowDeviceArray", + pa.Array._export_to_c_device, + pa.Array._import_from_c_device, + ) + + # verify exported struct + c_array = ffi.new("struct ArrowDeviceArray*") + ptr_array = int(ffi.cast("uintptr_t", c_array)) + arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32())) + arr._export_to_c_device(ptr_array) + + assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1 + assert c_array.device_id == -1 + assert c_array.array.length == 2 def check_export_import_schema(schema_factory, expected_schema_factory=None): @@ -289,10 +316,10 @@ def test_export_import_schema_float_pointer(): assert schema_new == make_schema() -def check_export_import_batch(batch_factory): +def check_export_import_batch(array_type, exporter, importer, batch_factory): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowArray*") + c_array = ffi.new(f"struct {array_type}*") ptr_array = int(ffi.cast("uintptr_t", c_array)) gc.collect() # Make sure no Arrow data dangles in a ref cycle @@ -302,11 +329,11 @@ def check_export_import_batch(batch_factory): batch = batch_factory() schema = batch.schema py_value = batch.to_pydict() - batch._export_to_c(ptr_array) + exporter(batch, ptr_array) assert pa.total_allocated_bytes() > old_allocated # Delete and recreate C++ object from exported pointer del batch - batch_new = pa.RecordBatch._import_from_c(ptr_array, schema) + batch_new = importer(ptr_array, schema) assert batch_new.to_pydict() == py_value assert batch_new.schema == schema assert pa.total_allocated_bytes() > old_allocated @@ -314,7 +341,7 @@ def check_export_import_batch(batch_factory): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_array_released: - pa.RecordBatch._import_from_c(ptr_array, make_schema()) + importer(ptr_array, make_schema()) # Type is exported and imported at the same time batch = batch_factory() @@ -322,7 +349,7 @@ def check_export_import_batch(batch_factory): batch._export_to_c(ptr_array, ptr_schema) # Delete and recreate C++ objects from exported pointers del batch - batch_new = pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + batch_new = importer(ptr_array, ptr_schema) assert batch_new.to_pydict() == py_value assert batch_new.schema == batch_factory().schema assert pa.total_allocated_bytes() > old_allocated @@ -330,28 +357,57 @@ def check_export_import_batch(batch_factory): assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: - pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) # Not a struct type pa.int32()._export_to_c(ptr_schema) batch_factory()._export_to_c(ptr_array) with pytest.raises(ValueError, match="ArrowSchema describes non-struct type"): - pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) # Now released with assert_schema_released: - pa.RecordBatch._import_from_c(ptr_array, ptr_schema) + importer(ptr_array, ptr_schema) @needs_cffi def test_export_import_batch(): - check_export_import_batch(make_batch) + check_export_import_batch( + "ArrowArray", + pa.RecordBatch._export_to_c, + pa.RecordBatch._import_from_c, + make_batch, + ) @needs_cffi def test_export_import_batch_with_extension(): with registered_extension_type(ParamExtType(1)): - check_export_import_batch(make_extension_batch) + check_export_import_batch( + "ArrowArray", + pa.RecordBatch._export_to_c, + pa.RecordBatch._import_from_c, + make_extension_batch, + ) + + +@needs_cffi +def test_export_import_device_batch(): + check_export_import_batch( + "ArrowDeviceArray", + pa.RecordBatch._export_to_c_device, + pa.RecordBatch._import_from_c_device, + make_batch, + ) + + # verify exported struct + c_array = ffi.new("struct ArrowDeviceArray*") + ptr_array = int(ffi.cast("uintptr_t", c_array)) + batch = make_batch() + batch._export_to_c_device(ptr_array) + assert c_array.device_type == 1 # ARROW_DEVICE_CPU 1 + assert c_array.device_id == -1 + assert c_array.array.length == 2 def _export_import_batch_reader(ptr_stream, reader_factory): @@ -577,9 +633,8 @@ def test_roundtrip_reader_capsule(constructor): obj = constructor(schema, batches) - # TODO: turn this to ValueError once we implement validation. bad_schema = pa.schema({'ints': pa.int32()}) - with pytest.raises(NotImplementedError): + with pytest.raises(pa.lib.ArrowTypeError, match="Field 0 cannot be cast"): obj.__arrow_c_stream__(bad_schema.__arrow_c_schema__()) # Can work with matching schema @@ -591,6 +646,21 @@ def test_roundtrip_reader_capsule(constructor): assert batch.equals(expected) +def test_roundtrip_batch_reader_capsule_requested_schema(): + batch = make_batch() + requested_schema = pa.schema([('ints', pa.list_(pa.int64()))]) + requested_capsule = requested_schema.__arrow_c_schema__() + batch_as_requested = batch.cast(requested_schema) + + capsule = batch.__arrow_c_stream__(requested_capsule) + assert PyCapsule_IsValid(capsule, b"arrow_array_stream") == 1 + imported_reader = pa.RecordBatchReader._import_from_c_capsule(capsule) + assert imported_reader.schema == requested_schema + assert imported_reader.read_next_batch().equals(batch_as_requested) + with pytest.raises(StopIteration): + imported_reader.read_next_batch() + + def test_roundtrip_batch_reader_capsule(): batch = make_batch() diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 4b58dc65bae9b..98cbd920b509b 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -3214,8 +3214,7 @@ def test_list_element(): def test_count_distinct(): - seed = datetime.datetime.now() - samples = [seed.replace(year=y) for y in range(1992, 2092)] + samples = [datetime.datetime(year=y, month=1, day=1) for y in range(1992, 2092)] arr = pa.array(samples, pa.timestamp("ns")) assert pc.count_distinct(arr) == pa.scalar(len(samples), type=pa.int64()) diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 55ea28f50fbb3..b824b89564374 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -252,21 +252,17 @@ def test_nested_lists(seq): assert arr.null_count == 1 assert arr.type == pa.list_(pa.int64()) assert arr.to_pylist() == data - # With explicit type - arr = pa.array(seq(data), type=pa.list_(pa.int32())) - assert len(arr) == 3 - assert arr.null_count == 1 - assert arr.type == pa.list_(pa.int32()) - assert arr.to_pylist() == data @parametrize_with_sequence_types -def test_nested_large_lists(seq): +@pytest.mark.parametrize("factory", [ + pa.list_, pa.large_list, pa.list_view, pa.large_list_view]) +def test_nested_lists_with_explicit_type(seq, factory): data = [[], [1, 2], None] - arr = pa.array(seq(data), type=pa.large_list(pa.int16())) + arr = pa.array(seq(data), type=factory(pa.int16())) assert len(arr) == 3 assert arr.null_count == 1 - assert arr.type == pa.large_list(pa.int16()) + assert arr.type == factory(pa.int16()) assert arr.to_pylist() == data @@ -277,15 +273,22 @@ def test_list_with_non_list(seq): pa.array(seq([[], [1, 2], 3]), type=pa.list_(pa.int64())) with pytest.raises(TypeError): pa.array(seq([[], [1, 2], 3]), type=pa.large_list(pa.int64())) + with pytest.raises(TypeError): + pa.array(seq([[], [1, 2], 3]), type=pa.list_view(pa.int64())) + with pytest.raises(TypeError): + pa.array(seq([[], [1, 2], 3]), type=pa.large_list_view(pa.int64())) @parametrize_with_sequence_types -def test_nested_arrays(seq): +@pytest.mark.parametrize("factory", [ + pa.list_, pa.large_list, pa.list_view, pa.large_list_view]) +def test_nested_arrays(seq, factory): arr = pa.array(seq([np.array([], dtype=np.int64), - np.array([1, 2], dtype=np.int64), None])) + np.array([1, 2], dtype=np.int64), None]), + type=factory(pa.int64())) assert len(arr) == 3 assert arr.null_count == 1 - assert arr.type == pa.list_(pa.int64()) + assert arr.type == factory(pa.int64()) assert arr.to_pylist() == [[], [1, 2], None] @@ -1464,9 +1467,18 @@ def test_sequence_duration_nested_lists(): assert arr.type == pa.list_(pa.duration('us')) assert arr.to_pylist() == data - arr = pa.array(data, type=pa.list_(pa.duration('ms'))) + +@pytest.mark.parametrize("factory", [ + pa.list_, pa.large_list, pa.list_view, pa.large_list_view]) +def test_sequence_duration_nested_lists_with_explicit_type(factory): + td1 = datetime.timedelta(1, 1, 1000) + td2 = datetime.timedelta(1, 100) + + data = [[td1, None], [td1, td2]] + + arr = pa.array(data, type=factory(pa.duration('ms'))) assert len(arr) == 2 - assert arr.type == pa.list_(pa.duration('ms')) + assert arr.type == factory(pa.duration('ms')) assert arr.to_pylist() == data @@ -2430,6 +2442,10 @@ def test_array_from_pylist_offset_overflow(): ), ([[1, 2, 3]], [pa.scalar([1, 2, 3])], pa.list_(pa.int64())), ([["a", "b"]], [pa.scalar(["a", "b"])], pa.list_(pa.string())), + ([[1, 2, 3]], [pa.scalar([1, 2, 3], type=pa.list_view(pa.int64()))], + pa.list_view(pa.int64())), + ([["a", "b"]], [pa.scalar(["a", "b"], type=pa.list_view(pa.string()))], + pa.list_view(pa.string())), ( [1, 2, None], [pa.scalar(1, type=pa.int8()), pa.scalar(2, type=pa.int8()), None], diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index a9054f0b174aa..8e20390385885 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -2725,6 +2725,16 @@ def test_open_dataset_from_uri_s3(s3_example_simple, dataset_reader): assert dataset_reader.to_table(dataset).equals(table) +@pytest.mark.parquet +@pytest.mark.s3 +def test_open_dataset_from_fileinfos(s3_example_simple, dataset_reader): + table, path, filesystem, uri, _, _, _, _ = s3_example_simple + selector = fs.FileSelector("mybucket") + finfos = filesystem.get_file_info(selector) + dataset = ds.dataset(finfos, format="parquet", filesystem=filesystem) + assert dataset_reader.to_table(dataset).equals(table) + + @pytest.mark.parquet @pytest.mark.s3 # still needed to create the data def test_open_dataset_from_uri_s3_fsspec(s3_example_simple): diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 6ba5137e4f63e..543c4399ddb47 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -1904,3 +1904,26 @@ def test_s3_finalize_region_resolver(): resolve_s3_region('voltrondata-labs-datasets') """ subprocess.check_call([sys.executable, "-c", code]) + + +@pytest.mark.s3 +def test_concurrent_s3fs_init(): + # GH-39897: lazy concurrent initialization of S3 subsystem should not crash + code = """if 1: + import threading + import pytest + from pyarrow.fs import (FileSystem, S3FileSystem, + ensure_s3_initialized, finalize_s3) + threads = [] + fn = lambda: FileSystem.from_uri('s3://mf-nwp-models/README.txt') + for i in range(4): + thread = threading.Thread(target = fn) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + finalize_s3() + """ + subprocess.check_call([sys.executable, "-c", code]) diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index 407011d90b734..d38f45b5feff4 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -1226,10 +1226,15 @@ def __arrow_c_stream__(self, requested_schema=None): reader = pa.RecordBatchReader.from_stream(wrapper, schema=data[0].schema) assert reader.read_all() == expected - # If schema doesn't match, raises NotImplementedError - with pytest.raises(NotImplementedError): + # Passing a different but castable schema works + good_schema = pa.schema([pa.field("a", pa.int32())]) + reader = pa.RecordBatchReader.from_stream(wrapper, schema=good_schema) + assert reader.read_all() == expected.cast(good_schema) + + # If schema doesn't match, raises TypeError + with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'): pa.RecordBatchReader.from_stream( - wrapper, schema=pa.schema([pa.field('a', pa.int32())]) + wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))]) ) # Proper type errors for wrong input @@ -1238,3 +1243,60 @@ def __arrow_c_stream__(self, requested_schema=None): with pytest.raises(TypeError): pa.RecordBatchReader.from_stream(expected, schema=data[0]) + + +def test_record_batch_reader_cast(): + schema_src = pa.schema([pa.field('a', pa.int64())]) + data = [ + pa.record_batch([pa.array([1, 2, 3], type=pa.int64())], names=['a']), + pa.record_batch([pa.array([4, 5, 6], type=pa.int64())], names=['a']), + ] + table_src = pa.Table.from_batches(data) + + # Cast to same type should always work + reader = pa.RecordBatchReader.from_batches(schema_src, data) + assert reader.cast(schema_src).read_all() == table_src + + # Check non-trivial cast + schema_dst = pa.schema([pa.field('a', pa.int32())]) + reader = pa.RecordBatchReader.from_batches(schema_src, data) + assert reader.cast(schema_dst).read_all() == table_src.cast(schema_dst) + + # Check error for field name/length mismatch + reader = pa.RecordBatchReader.from_batches(schema_src, data) + with pytest.raises(ValueError, match="Target schema's field names"): + reader.cast(pa.schema([])) + + # Check error for impossible cast in call to .cast() + reader = pa.RecordBatchReader.from_batches(schema_src, data) + with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'): + reader.cast(pa.schema([pa.field('a', pa.list_(pa.int32()))])) + + +def test_record_batch_reader_cast_nulls(): + schema_src = pa.schema([pa.field('a', pa.int64())]) + data_with_nulls = [ + pa.record_batch([pa.array([1, 2, None], type=pa.int64())], names=['a']), + ] + data_without_nulls = [ + pa.record_batch([pa.array([1, 2, 3], type=pa.int64())], names=['a']), + ] + table_with_nulls = pa.Table.from_batches(data_with_nulls) + table_without_nulls = pa.Table.from_batches(data_without_nulls) + + # Cast to nullable destination should work + reader = pa.RecordBatchReader.from_batches(schema_src, data_with_nulls) + schema_dst = pa.schema([pa.field('a', pa.int32())]) + assert reader.cast(schema_dst).read_all() == table_with_nulls.cast(schema_dst) + + # Cast to non-nullable destination should work if there are no nulls + reader = pa.RecordBatchReader.from_batches(schema_src, data_without_nulls) + schema_dst = pa.schema([pa.field('a', pa.int32(), nullable=False)]) + assert reader.cast(schema_dst).read_all() == table_without_nulls.cast(schema_dst) + + # Cast to non-nullable destination should error if there are nulls + # when the batch is pulled + reader = pa.RecordBatchReader.from_batches(schema_src, data_with_nulls) + casted_reader = reader.cast(schema_dst) + with pytest.raises(pa.lib.ArrowInvalid, match="Can't cast array"): + casted_reader.read_all() diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 074fb757e265a..6a814111898b7 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -57,9 +57,8 @@ ([1, 2, 3], None, pa.ListScalar), ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar), ([1, 2, 3, 4, 5], pa.list_(pa.int8(), 5), pa.FixedSizeListScalar), - # TODO GH-39855 - # ([1, 2, 3], pa.list_view(pa.int8()), pa.ListViewScalar), - # ([1, 2, 3, 4], pa.large_list_view(pa.int8()), pa.LargeListViewScalar), + ([1, 2, 3], pa.list_view(pa.int8()), pa.ListViewScalar), + ([1, 2, 3, 4], pa.large_list_view(pa.int8()), pa.LargeListViewScalar), (datetime.date.today(), None, pa.Date32Scalar), (datetime.date.today(), pa.date64(), pa.Date64Scalar), (datetime.datetime.now(), None, pa.TimestampScalar), @@ -541,9 +540,8 @@ def test_fixed_size_binary(): @pytest.mark.parametrize(('ty', 'klass'), [ (pa.list_(pa.string()), pa.ListScalar), (pa.large_list(pa.string()), pa.LargeListScalar), - # TODO GH-39855 - # (pa.list_view(pa.string()), pa.ListViewScalar), - # (pa.large_list_view(pa.string()), pa.LargeListViewScalar) + (pa.list_view(pa.string()), pa.ListViewScalar), + (pa.large_list_view(pa.string()), pa.LargeListViewScalar) ]) def test_list(ty, klass): v = ['foo', None] @@ -565,14 +563,29 @@ def test_list(ty, klass): s[2] -def test_list_from_numpy(): - s = pa.scalar(np.array([1, 2, 3], dtype=np.int64())) - assert s.type == pa.list_(pa.int64()) +@pytest.mark.parametrize('ty', [ + pa.list_(pa.int64()), + pa.large_list(pa.int64()), + pa.list_view(pa.int64()), + pa.large_list_view(pa.int64()), + None +]) +def test_list_from_numpy(ty): + s = pa.scalar(np.array([1, 2, 3], dtype=np.int64()), type=ty) + if ty is None: + ty = pa.list_(pa.int64()) # expected inferred type + assert s.type == ty assert s.as_py() == [1, 2, 3] @pytest.mark.pandas -def test_list_from_pandas(): +@pytest.mark.parametrize('factory', [ + pa.list_, + pa.large_list, + pa.list_view, + pa.large_list_view +]) +def test_list_from_pandas(factory): import pandas as pd s = pa.scalar(pd.Series([1, 2, 3])) @@ -580,11 +593,11 @@ def test_list_from_pandas(): cases = [ (np.nan, 'null'), - (['string', np.nan], pa.list_(pa.binary())), - (['string', np.nan], pa.list_(pa.utf8())), - ([b'string', np.nan], pa.list_(pa.binary(6))), - ([True, np.nan], pa.list_(pa.bool_())), - ([decimal.Decimal('0'), np.nan], pa.list_(pa.decimal128(12, 2))), + (['string', np.nan], factory(pa.binary())), + (['string', np.nan], factory(pa.utf8())), + ([b'string', np.nan], factory(pa.binary(6))), + ([True, np.nan], factory(pa.bool_())), + ([decimal.Decimal('0'), np.nan], factory(pa.decimal128(12, 2))), ] for case, ty in cases: # Both types of exceptions are raised. May want to clean that up diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index d6def54570581..f0fd5518de067 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -635,9 +635,18 @@ def __arrow_c_stream__(self, requested_schema=None): result = pa.table(wrapper, schema=data[0].schema) assert result == expected + # Passing a different schema will cast + good_schema = pa.schema([pa.field('a', pa.int32())]) + result = pa.table(wrapper, schema=good_schema) + assert result == expected.cast(good_schema) + # If schema doesn't match, raises NotImplementedError - with pytest.raises(NotImplementedError): - pa.table(wrapper, schema=pa.schema([pa.field('a', pa.int32())])) + with pytest.raises( + pa.lib.ArrowTypeError, match="Field 0 cannot be cast" + ): + pa.table( + wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))]) + ) def test_recordbatch_itercolumns(): @@ -2620,6 +2629,25 @@ def test_record_batch_sort(): assert sorted_rb_dict["c"] == ["foobar", "bar", "foo", "car"] +def test_record_batch_cast(): + rb = pa.RecordBatch.from_arrays([ + pa.array([None, 1]), + pa.array([False, True]) + ], names=["a", "b"]) + new_schema = pa.schema([pa.field("a", "int64", nullable=True), + pa.field("b", "bool", nullable=False)]) + + assert rb.cast(new_schema).schema == new_schema + + # Casting a nullable field to non-nullable is invalid + rb = pa.RecordBatch.from_arrays([ + pa.array([None, 1]), + pa.array([None, True]) + ], names=["a", "b"]) + with pytest.raises(ValueError): + rb.cast(new_schema) + + @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch]) def test_numpy_asarray(constructor): table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"]) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 0add5786088d3..1d132a6af8a4d 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -214,7 +214,10 @@ def test_is_nested_or_struct(): assert types.is_nested(struct_ex) assert types.is_nested(pa.list_(pa.int32())) + assert types.is_nested(pa.list_(pa.int32(), 3)) assert types.is_nested(pa.large_list(pa.int32())) + assert types.is_nested(pa.list_view(pa.int32())) + assert types.is_nested(pa.large_list_view(pa.int32())) assert not types.is_nested(pa.int32()) @@ -942,18 +945,37 @@ def test_type_id(): assert isinstance(ty.id, int) -def test_bit_width(): - for ty, expected in [(pa.bool_(), 1), - (pa.int8(), 8), - (pa.uint32(), 32), - (pa.float16(), 16), - (pa.decimal128(19, 4), 128), - (pa.decimal256(76, 38), 256), - (pa.binary(42), 42 * 8)]: - assert ty.bit_width == expected - for ty in [pa.binary(), pa.string(), pa.list_(pa.int16())]: +def test_bit_and_byte_width(): + for ty, expected_bit_width, expected_byte_width in [ + (pa.bool_(), 1, 0), + (pa.int8(), 8, 1), + (pa.uint32(), 32, 4), + (pa.float16(), 16, 2), + (pa.timestamp('s'), 64, 8), + (pa.date32(), 32, 4), + (pa.decimal128(19, 4), 128, 16), + (pa.decimal256(76, 38), 256, 32), + (pa.binary(42), 42 * 8, 42) + ]: + assert ty.bit_width == expected_bit_width + + if expected_byte_width == 0: + with pytest.raises(ValueError, match="Less than one byte"): + ty.byte_width + else: + assert ty.byte_width == expected_byte_width + + for ty in [ + pa.binary(), + pa.string(), + pa.list_(pa.int16()), + pa.map_(pa.string(), pa.int32()), + pa.struct([('f1', pa.int32())]) + ]: with pytest.raises(ValueError, match="fixed width"): ty.bit_width + with pytest.raises(ValueError, match="fixed width"): + ty.byte_width def test_fixed_size_binary_byte_width(): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index e9bf56c6213f6..fbbf36ae9f551 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -257,6 +257,28 @@ cdef class DataType(_Weakrefable): raise ValueError("Non-fixed width type") return ty.bit_width() + @property + def byte_width(self): + """ + Byte width for fixed width type. + + Examples + -------- + >>> import pyarrow as pa + >>> pa.int64() + DataType(int64) + >>> pa.int64().byte_width + 8 + """ + cdef _CFixedWidthTypePtr ty + ty = dynamic_cast[_CFixedWidthTypePtr](self.type) + if ty == nullptr: + raise ValueError("Non-fixed width type") + byte_width = ty.byte_width() + if byte_width == 0: + raise ValueError("Less than one byte") + return byte_width + @property def num_fields(self): """ @@ -1342,20 +1364,6 @@ cdef class FixedSizeBinaryType(DataType): def __reduce__(self): return binary, (self.byte_width,) - @property - def byte_width(self): - """ - The binary size in bytes. - - Examples - -------- - >>> import pyarrow as pa - >>> t = pa.binary(3) - >>> t.byte_width - 3 - """ - return self.fixed_size_binary_type.byte_width() - cdef class Decimal128Type(FixedSizeBinaryType): """ diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py index 0f68ca9fe574b..66b1ec33953a9 100644 --- a/python/pyarrow/types.py +++ b/python/pyarrow/types.py @@ -40,8 +40,9 @@ lib.Type_DURATION} | _TIME_TYPES | _DATE_TYPES | _INTERVAL_TYPES) _UNION_TYPES = {lib.Type_SPARSE_UNION, lib.Type_DENSE_UNION} -_NESTED_TYPES = {lib.Type_LIST, lib.Type_LARGE_LIST, lib.Type_STRUCT, - lib.Type_MAP} | _UNION_TYPES +_NESTED_TYPES = {lib.Type_LIST, lib.Type_FIXED_SIZE_LIST, lib.Type_LARGE_LIST, + lib.Type_LIST_VIEW, lib.Type_LARGE_LIST_VIEW, + lib.Type_STRUCT, lib.Type_MAP} | _UNION_TYPES @doc(datatype="null") diff --git a/r/NEWS.md b/r/NEWS.md index 58c82c5128b82..06c49c7be006f 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -50,6 +50,17 @@ * A large number of minor spelling mistakes were fixed (@jsoref, #38929, #38257) * The developer documentation has been updated to match changes made in recent releases (#38220) +# arrow 14.0.2.1 + +## Minor improvements and fixes + +* Check for internet access when building from source and fallback to a + minimally scoped Arrow C++ build (#39699). +* Build from source by default on macOS, use `LIBARROW_BINARY=true` for old behavior (#39861). +* Support building against older versions of Arrow C++. This is currently opt-in + (`ARROW_R_ALLOW_CPP_VERSION_MISMATCH=true`) and requires atleast Arrow C++ 13.0.0 (#39739). +* Make it possible to use Arrow C++ from Rtools on windows (in future Rtools versions). (#39986). + # arrow 14.0.2 ## Minor improvements and fixes diff --git a/r/R/duckdb.R b/r/R/duckdb.R index 9632e9bad1984..a2bf62de2fde2 100644 --- a/r/R/duckdb.R +++ b/r/R/duckdb.R @@ -64,7 +64,7 @@ to_duckdb <- function(.data, tbl <- dplyr::tbl(con, table_name) groups <- dplyr::groups(.data) if (length(groups)) { - tbl <- dplyr::group_by(tbl, groups) + tbl <- dplyr::group_by(tbl, !!!groups) } if (auto_disconnect) { diff --git a/r/R/flight.R b/r/R/flight.R index 0bd661e58d565..d151c705c6c1c 100644 --- a/r/R/flight.R +++ b/r/R/flight.R @@ -56,7 +56,8 @@ flight_disconnect <- function(client) { #' @param overwrite logical: if `path` exists on `client` already, should we #' replace it with the contents of `data`? Default is `TRUE`; if `FALSE` and #' `path` exists, the function will error. -#' @param max_chunksize integer: Maximum size for RecordBatch chunks when a `data.frame` is sent. +#' @param max_chunksize integer: Maximum number of rows for RecordBatch chunks +#' when a `data.frame` is sent. #' Individual chunks may be smaller depending on the chunk layout of individual columns. #' @return `client`, invisibly. #' @export diff --git a/r/man/flight_put.Rd b/r/man/flight_put.Rd index c306b0f7bb9e0..f14c446d01053 100644 --- a/r/man/flight_put.Rd +++ b/r/man/flight_put.Rd @@ -17,7 +17,8 @@ flight_put(client, data, path, overwrite = TRUE, max_chunksize = NULL) replace it with the contents of \code{data}? Default is \code{TRUE}; if \code{FALSE} and \code{path} exists, the function will error.} -\item{max_chunksize}{integer: Maximum size for RecordBatch chunks when a \code{data.frame} is sent. +\item{max_chunksize}{integer: Maximum number of rows for RecordBatch chunks +when a \code{data.frame} is sent. Individual chunks may be smaller depending on the chunk layout of individual columns.} } \value{